To make any python config that uses input files run data-parallel, use the following lines for input source and output source.
import os
runNEnv =os.getenv("PBS_ARRAYID","1")
# name of the output file
theFileName = 'MyOutput'+str(runNEnv)+'.root'
# number of events per job
nEvents = 1000
process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(nEvents) )
print("job number: " + str(runNEnv))
print("out file name:" + str(theFileName))
myFiles = cms.untracked.vstring()
myFiles.extend([
])
process.source = cms.Source("PoolSource",
fileNames = myFileNames,
skipEvents = cms.untracked.uint32(nEvents*int(runNEnv))
)
process.FEVT = cms.OutputModule("PoolOutputModule",
fileName = cms.untracked.string(theFileName),
outputCommands = cms.untracked.vstring(
"keep *"
)
)
For numbered input files, with file checking:
for i in range(0,1000):
fileNameString = '/blah/blah-run'+str(1000+i)+'.root'
try:
dummyFile = open(fileNameString,"rb")
dummyFile.close()
dummyRootFile = ROOT.TFile(fileNameString)
if dummyRootFile.GetSize() != -1:
myFileNames.extend(["file:"+fileNameString])
else:
print("Warning: ROOT Could not open " + fileNameString)
dummyRootFile.Close()
except IOError:
print("Warning: Could not open "+fileNameString)
Add this to the torque submission file:
#Multiple Job Submission:
#Jobs will have a variable called $PBS_ARRAYID
#that will be one of the following numbers
#PBS -t 1-10