Hugonweb | CMSSW Parallel Analysis

To make any python config that uses input files run data-parallel, use the following lines for input source and output source.

import os
runNEnv =os.getenv("PBS_ARRAYID","1")

#   name of the output file
theFileName = 'MyOutput'+str(runNEnv)+'.root'
#   number of events per job
nEvents = 1000

process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(nEvents) )

print("job number: " + str(runNEnv))
print("out file name:" + str(theFileName))

myFiles = cms.untracked.vstring()
myFiles.extend([

])

process.source = cms.Source("PoolSource",
    fileNames = myFileNames,
    skipEvents = cms.untracked.uint32(nEvents*int(runNEnv))
)

process.FEVT = cms.OutputModule("PoolOutputModule",
        fileName = cms.untracked.string(theFileName),
        outputCommands = cms.untracked.vstring(
                "keep *"
        )
)

For numbered input files, with file checking:

for i in range(0,1000):
        fileNameString = '/blah/blah-run'+str(1000+i)+'.root'
        try:
                dummyFile = open(fileNameString,"rb")
                dummyFile.close()
                dummyRootFile = ROOT.TFile(fileNameString)
                if dummyRootFile.GetSize() != -1:
                        myFileNames.extend(["file:"+fileNameString])
                else:
                        print("Warning: ROOT Could not open " + fileNameString)
                dummyRootFile.Close()
        except IOError:
                print("Warning: Could not open "+fileNameString)

Torque Multiple Jobs

Add this to the torque submission file:

#Multiple Job Submission:
#Jobs will have a variable called $PBS_ARRAYID
#that will be one of the following numbers
#PBS -t 1-10