Skip to content

Commit

Permalink
cms-2016-simulated-datasets: updates as in #182
Browse files Browse the repository at this point in the history
  • Loading branch information
Kati Lassila-Perini committed Oct 24, 2023
1 parent cdd814a commit 98fa015
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 15 deletions.
2 changes: 1 addition & 1 deletion cms-2016-simulated-datasets/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ To build sample records (with a limited number of datasets in the input file) do
$ python3 ./code/interface.py --create-das-json-store --ignore-eos-store DATASET_LIST

$ auth-get-sso-cookie -u https://cms-pdmv.cern.ch/mcm -o cookies.txt
$ python3 ./code/interface.py --create-mcm-json-store --ignore-eos-store DATASET_LIST
$ python3 ./code/interface.py --create-mcm-store --ignore-eos-store DATASET_LIST

$ openssl pkcs12 -in myCert.p12 -nocerts -nodes -out userkey.nodes.pem # if not present
$ python3 ./code/interface.py --get-conf-files --ignore-eos-store DATASET_LIST
Expand Down
7 changes: 4 additions & 3 deletions cms-2016-simulated-datasets/code/config_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ def main(eos_dir,

conffile_ids = []
for dataset_full_name in eos_datasets:
for conffile_id in get_conffile_ids_all_chain_steps(dataset_full_name, mcm_dir):
if conffile_id not in conffile_ids:
conffile_ids.append(conffile_id)
if dataset_full_name.endswith('MINIAODSIM') == 0:
for conffile_id in get_conffile_ids_all_chain_steps(dataset_full_name, mcm_dir):
if conffile_id not in conffile_ids:
conffile_ids.append(conffile_id)

if not os.path.exists(conf_dir):
os.makedirs(conf_dir, exist_ok=True)
Expand Down
4 changes: 3 additions & 1 deletion cms-2016-simulated-datasets/code/das_json_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


def get_parent_dataset(dataset, das_dir):
"Return parent dataset to the given dataset or an empty string if no parent found. Not used for 2016"
"Return parent dataset to the given dataset or an empty string if no parent found."
parent_dataset = ''

filepath = das_dir + '/parent/' + dataset.replace('/', '@') + '.json'
Expand Down Expand Up @@ -74,6 +74,7 @@ def create(dataset, das_dir):

result_file = dataset.replace('/', '@') + ".json"
mydasgoclient(dataset, "dataset", das_dir, result_file)
mydasgoclient(dataset, "parent", das_dir, result_file)
mydasgoclient(dataset, "config", das_dir, result_file)
mydasgoclient(dataset, "release", das_dir, result_file)

Expand All @@ -86,6 +87,7 @@ def main(das_dir,

# create dirs for dataset and release
for path in [das_dir + '/dataset',
das_dir + '/parent',
das_dir + '/config',
das_dir + '/release']:
if not os.path.exists(path):
Expand Down
52 changes: 46 additions & 6 deletions cms-2016-simulated-datasets/code/dataset_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,25 @@ def get_globaltag_from_conffile(afile, conf_dir):
return globaltag


def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid):
#def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid):
def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
"""Return DICT with all information about the generator steps."""

# For MiniAODSIM, find the corresponding Nano and use that information
# Might be best done at the when querying the McM

if dataset.endswith('MINIAODSIM'):
nano_found=0
dataset_first_name = get_from_deep_json(get_mcm_dict(dataset, mcm_dir), 'dataset_name')
for x in os.listdir(mcm_dir + '/chain'):
if x.startswith('@'+dataset_first_name):
dataset = x.replace('@', '/')
nano_found=1

if nano_found==0:
print("A corresponding NANOAODSIM was not found for dataset: " + dataset)

recid = recid_info[dataset]
info = {}
info["description"] = "<p>These data were generated in several steps (see also <a href=\"/docs/cms-mc-production-overview\">CMS Monte Carlo production overview</a>):</p>"
info["steps"] = []
Expand Down Expand Up @@ -341,11 +357,18 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
rec['license'] = {}
rec['license']['attribution'] = 'CC0'

rec['methodology'] = get_all_generator_text(dataset_full_name, das_dir, mcm_dir, conffiles_dir, recid_info[dataset_full_name])
rec['methodology'] = get_all_generator_text(dataset_full_name, das_dir, mcm_dir, conffiles_dir, recid_info)

# For Mini, get the pileup from the corresponding Nano
dataset_name_for_nano = dataset_full_name
if dataset_full_name.endswith('MINIAODSIM'):
dataset_first_name = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'dataset_name')
for x in os.listdir(mcm_dir + '/chain'):
if x.startswith('@'+dataset_first_name):
dataset_name_for_nano = x.replace('@', '/')

pileup_dataset_name= ''
pileup_dataset_name= get_pileup_from_mcm(dataset_full_name, mcm_dir)
pileup_dataset_name= get_pileup_from_mcm(dataset_name_for_nano, mcm_dir)

pileup_dataset_recid = {
'/MinBias_TuneZ2_7TeV-pythia6/Summer11Leg-START53_LV4-v1/GEN-SIM': 36, # 2011
Expand Down Expand Up @@ -375,9 +398,26 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm

rec['recid'] = str(recid_info[dataset_full_name])

# rec['relations'] = []
# rec['relations']['title'] = '' # FIXME, 2016 Nano are childs of 2016 Mini
# rec['relations']['type'] = 'isChildOf'
if dataset_full_name.endswith('NANOAODSIM'):
# Query from mcm dict fails for an example dataset because Mini is v1 in mcm and v2 in dataset list
# Get it from das instead
#dataset_name_for_mini = get_from_deep_json(get_mcm_dict(dataset_full_name, mcm_dir), 'input_dataset')
dataset_name_for_mini = get_parent_dataset(dataset_full_name, das_dir)
relations_description = 'The corresponding MINIAODSIM dataset:'
relations_recid = str(recid_info[dataset_name_for_mini])
relations_type = 'isParentOf'
else:
relations_description = 'The corresponding NANOAODSIM dataset:'
relations_recid = str(recid_info[dataset_name_for_nano])
relations_type = 'isChildOf'

rec['relations'] = [
{
'description': relations_description,
'recid': relations_recid,
'type': relations_type
}
]

rec['run_period'] = run_period

Expand Down
8 changes: 6 additions & 2 deletions cms-2016-simulated-datasets/code/mcm_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def mcm_downloader(dataset, mcm_dir):
print("==> " + dataset + "\n==> Already exist. Skipping...")
return

cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/"
#cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/"
cmd = "curl -s -k https://cms-pdmv-prod.web.cern.ch/mcm/public/restapi/requests/"

mcm_dict = subprocess.run(cmd + "produces" + dataset,
shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand Down Expand Up @@ -52,6 +53,8 @@ def mcm_downloader(dataset, mcm_dir):
### New 2016
# create a directory with the dataset name under mcm_dir + "/chain"
# create dirs
if dataset.endswith('MINIAODSIM'):
return
path = mcm_dir + "/chain/" + dataset.replace('/', '@')
os.makedirs(path, exist_ok=True)

Expand All @@ -66,7 +69,8 @@ def mcm_downloader(dataset, mcm_dir):
# commands line: curl -L -s -b cookies.txt https://cms-pdmv.cern.ch/mcm/restapi/chained_requests/get/<chain_prepid> | jq .results.chain
# FIXME: change shell jq to deep json query
# REQUIRES: run on command line first: auth-get-sso-cookie -u https://cms-pdmv.cern.ch/mcm -o cookies.txt
chaincmd = "curl -L -s -b cookies.txt https://cms-pdmv.cern.ch/mcm/restapi/chained_requests/"
# chaincmd = "curl -L -s -b cookies.txt https://cms-pdmv.cern.ch/mcm/restapi/chained_requests/"
chaincmd = "curl -L -s -b cookies.txt https://cms-pdmv-prod.web.cern.ch/mcm/restapi/chained_requests/"
mcm_chain_prepids = subprocess.run(chaincmd + "get/" + chain_prepid + " | jq .results.chain",
shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
mcm_chain_prepids_out = str(mcm_chain_prepids.stdout.decode("utf-8"))
Expand Down
3 changes: 3 additions & 0 deletions cms-2016-simulated-datasets/inputs/CMS-2016-mc-datasets.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
/ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v2/NANOAODSIM
/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM
/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM
/ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v2/MINIAODSIM
/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM
/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM
7 changes: 5 additions & 2 deletions cms-2016-simulated-datasets/inputs/recid_info.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
RECID_INFO ={
"/ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v2/NANOAODSIM": 30000,
"/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30001,
"/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30002
"/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM": 30001,
"/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16NanoAODv9-106X_mcRun2_asymptotic_v17-v1/NANOAODSIM": 30002,
"/ADDmonoPhoton_MD-1_d-3_TuneCP5_13TeV-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v2/MINIAODSIM": 30003,
"/BBH_HToJPsiG_JPsiToMuMu_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30004,
"/WminusJetsToTauNu_TauToMu_TuneCP5_13TeV-powhegMiNNLO-pythia8-photos/RunIISummer20UL16MiniAODv2-106X_mcRun2_asymptotic_v17-v1/MINIAODSIM": 30005
}

0 comments on commit 98fa015

Please sign in to comment.