Package madgraph :: Package madevent :: Module gen_ximprove
[hide private]
[frames] | no frames]

Source Code for Module madgraph.madevent.gen_ximprove

   1  ################################################################################ 
   2  # 
   3  # Copyright (c) 2014 The MadGraph5_aMC@NLO Development team and Contributors 
   4  # 
   5  # This file is a part of the MadGraph5_aMC@NLO project, an application which  
   6  # automatically generates Feynman diagrams and matrix elements for arbitrary 
   7  # high-energy processes in the Standard Model and beyond. 
   8  # 
   9  # It is subject to the MadGraph5_aMC@NLO license which should accompany this  
  10  # distribution. 
  11  # 
  12  # For more information, visit madgraph.phys.ucl.ac.be and amcatnlo.web.cern.ch 
  13  # 
  14  ################################################################################ 
  15  """ A python file to replace the fortran script gen_ximprove. 
  16      This script analyses the result of the survey/ previous refine and  
  17      creates the jobs for the following script. 
  18  """ 
  19  from __future__ import division 
  20   
  21  import collections 
  22  import os 
  23  import glob 
  24  import logging 
  25  import math 
  26  import re 
  27  import subprocess 
  28  import shutil 
  29  import stat 
  30  import sys 
  31   
  32  try: 
  33      import madgraph 
  34  except ImportError: 
  35      MADEVENT = True 
  36      import internal.sum_html as sum_html 
  37      import internal.banner as bannermod 
  38      import internal.misc as misc 
  39      import internal.files as files 
  40      import internal.cluster as cluster 
  41      import internal.combine_grid as combine_grid 
  42      import internal.combine_runs as combine_runs 
  43      import internal.lhe_parser as lhe_parser 
  44  else: 
  45      MADEVENT= False 
  46      import madgraph.madevent.sum_html as sum_html 
  47      import madgraph.various.banner as bannermod 
  48      import madgraph.various.misc as misc 
  49      import madgraph.iolibs.files as files 
  50      import madgraph.various.cluster as cluster 
  51      import madgraph.madevent.combine_grid as combine_grid 
  52      import madgraph.madevent.combine_runs as combine_runs 
  53      import madgraph.various.lhe_parser as lhe_parser 
  54   
  55  logger = logging.getLogger('madgraph.madevent.gen_ximprove') 
  56  pjoin = os.path.join 
57 58 -class gensym(object):
59 """a class to call the fortran gensym executable and handle it's output 60 in order to create the various job that are needed for the survey""" 61 62 #convenient shortcut for the formatting of variable 63 @ staticmethod
64 - def format_variable(*args):
65 return bannermod.ConfigFile.format_variable(*args)
66 67 combining_job = 2 # number of channel by ajob 68 splitted_grid = False 69 min_iterations = 3 70 mode= "survey" 71 72
73 - def __init__(self, cmd, opt=None):
74 75 try: 76 super(gensym, self).__init__(cmd, opt) 77 except TypeError: 78 pass 79 80 # Run statistics, a dictionary of RunStatistics(), with 81 self.run_statistics = {} 82 83 self.cmd = cmd 84 self.run_card = cmd.run_card 85 self.me_dir = cmd.me_dir 86 87 88 # dictionary to keep track of the precision when combining iteration 89 self.cross = collections.defaultdict(int) 90 self.abscross = collections.defaultdict(int) 91 self.sigma = collections.defaultdict(int) 92 self.chi2 = collections.defaultdict(int) 93 94 self.splitted_grid = False 95 if self.cmd.proc_characteristics['loop_induced']: 96 nexternal = self.cmd.proc_characteristics['nexternal'] 97 self.splitted_grid = max(2, (nexternal-2)**2) 98 if hasattr(self.cmd, "opts") and self.cmd.opts['accuracy'] == 0.1: 99 self.cmd.opts['accuracy'] = 0.02 100 101 if isinstance(cmd.cluster, cluster.MultiCore) and self.splitted_grid > 1: 102 self.splitted_grid = int(cmd.cluster.nb_core**0.5) 103 if self.splitted_grid == 1 and cmd.cluster.nb_core >1: 104 self.splitted_grid = 2 105 106 #if the user defines it in the run_card: 107 if self.run_card['survey_splitting'] != -1: 108 self.splitted_grid = self.run_card['survey_splitting'] 109 if self.run_card['survey_nchannel_per_job'] != -1: 110 self.combining_job = self.run_card['survey_nchannel_per_job'] 111 112 self.splitted_Pdir = {} 113 self.splitted_for_dir = lambda x,y: self.splitted_grid 114 self.combining_job_for_Pdir = lambda x: self.combining_job 115 self.lastoffset = {}
116
117 - def launch(self, to_submit=True, clean=True):
118 """ """ 119 120 self.subproc = [l.strip() for l in open(pjoin(self.me_dir,'SubProcesses', 121 'subproc.mg'))] 122 subproc = self.subproc 123 124 P_zero_result = [] # check the number of times where they are no phase-space 125 126 nb_tot_proc = len(subproc) 127 job_list = {} 128 for nb_proc,subdir in enumerate(subproc): 129 self.cmd.update_status('Compiling for process %s/%s. <br> (previous processes already running)' % \ 130 (nb_proc+1,nb_tot_proc), level=None) 131 132 subdir = subdir.strip() 133 Pdir = pjoin(self.me_dir, 'SubProcesses',subdir) 134 logger.info(' %s ' % subdir) 135 136 # clean previous run 137 if clean: 138 for match in misc.glob('*ajob*', Pdir): 139 if os.path.basename(match)[:4] in ['ajob', 'wait', 'run.', 'done']: 140 os.remove(match) 141 for match in misc.glob('G*', Pdir): 142 if os.path.exists(pjoin(match,'results.dat')): 143 os.remove(pjoin(match, 'results.dat')) 144 if os.path.exists(pjoin(match, 'ftn25')): 145 os.remove(pjoin(match, 'ftn25')) 146 147 #compile gensym 148 self.cmd.compile(['gensym'], cwd=Pdir) 149 if not os.path.exists(pjoin(Pdir, 'gensym')): 150 raise Exception, 'Error make gensym not successful' 151 152 # Launch gensym 153 p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, 154 stderr=subprocess.STDOUT, cwd=Pdir) 155 #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts 156 (stdout, _) = p.communicate('') 157 158 if os.path.exists(pjoin(self.me_dir,'error')): 159 files.mv(pjoin(self.me_dir,'error'), pjoin(Pdir,'ajob.no_ps.log')) 160 P_zero_result.append(subdir) 161 continue 162 163 jobs = stdout.split() 164 job_list[Pdir] = jobs 165 try: 166 # check that all input are valid 167 [float(s) for s in jobs] 168 except Exception: 169 logger.debug("unformated string found in gensym. Please check:\n %s" % stdout) 170 done=False 171 job_list[Pdir] = [] 172 lines = stdout.split('\n') 173 for l in lines: 174 try: 175 [float(s) for s in l.split()] 176 except: 177 continue 178 else: 179 if done: 180 raise Exception, 'Parsing error in gensym: %s' % stdout 181 job_list[Pdir] = l.split() 182 done = True 183 if not done: 184 raise Exception, 'Parsing error in gensym: %s' % stdout 185 186 self.cmd.compile(['madevent'], cwd=Pdir) 187 if to_submit: 188 self.submit_to_cluster(job_list) 189 job_list = {} 190 191 return job_list, P_zero_result
192
193 - def resubmit(self, min_precision=1.0, resubmit_zero=False):
194 """collect the result of the current run and relaunch each channel 195 not completed or optionally a completed one with a precision worse than 196 a threshold (and/or the zero result channel)""" 197 198 job_list, P_zero_result = self.launch(to_submit=False, clean=False) 199 200 for P , jobs in dict(job_list).items(): 201 misc.sprint(jobs) 202 to_resub = [] 203 for job in jobs: 204 if os.path.exists(pjoin(P, 'G%s' % job)) and os.path.exists(pjoin(P, 'G%s' % job, 'results.dat')): 205 one_result = sum_html.OneResult(job) 206 try: 207 one_result.read_results(pjoin(P, 'G%s' % job, 'results.dat')) 208 except: 209 to_resub.append(job) 210 if one_result.xsec == 0: 211 if resubmit_zero: 212 to_resub.append(job) 213 elif max(one_result.xerru, one_result.xerrc)/one_result.xsec > min_precision: 214 to_resub.append(job) 215 else: 216 to_resub.append(job) 217 if to_resub: 218 for G in to_resub: 219 try: 220 shutil.rmtree(pjoin(P, 'G%s' % G)) 221 except Exception, error: 222 misc.sprint(error) 223 pass 224 misc.sprint(to_resub) 225 self.submit_to_cluster({P: to_resub})
226 227 228 229 230 231 232 233 234 235 236
237 - def submit_to_cluster(self, job_list):
238 """ """ 239 240 if self.run_card['job_strategy'] > 0: 241 if len(job_list) >1: 242 for path, dirs in job_list.items(): 243 self.submit_to_cluster({path:dirs}) 244 return 245 path, value = job_list.items()[0] 246 nexternal = self.cmd.proc_characteristics['nexternal'] 247 current = open(pjoin(path, "nexternal.inc")).read() 248 ext = re.search(r"PARAMETER \(NEXTERNAL=(\d+)\)", current).group(1) 249 250 if self.run_card['job_strategy'] == 2: 251 self.splitted_grid = 2 252 if nexternal == int(ext): 253 to_split = 2 254 else: 255 to_split = 0 256 if hasattr(self, 'splitted_Pdir'): 257 self.splitted_Pdir[path] = to_split 258 else: 259 self.splitted_Pdir = {path: to_split} 260 self.splitted_for_dir = lambda x,y : self.splitted_Pdir[x] 261 elif self.run_card['job_strategy'] == 1: 262 if nexternal == int(ext): 263 combine = 1 264 else: 265 combine = self.combining_job 266 if hasattr(self, 'splitted_Pdir'): 267 self.splitted_Pdir[path] = combine 268 else: 269 self.splitted_Pdir = {path: combine} 270 self.combining_job_for_Pdir = lambda x : self.splitted_Pdir[x] 271 272 if not self.splitted_grid: 273 return self.submit_to_cluster_no_splitting(job_list) 274 elif self.cmd.cluster_mode == 0: 275 return self.submit_to_cluster_no_splitting(job_list) 276 elif self.cmd.cluster_mode == 2 and self.cmd.options['nb_core'] == 1: 277 return self.submit_to_cluster_no_splitting(job_list) 278 else: 279 return self.submit_to_cluster_splitted(job_list)
280 281
282 - def submit_to_cluster_no_splitting(self, job_list):
283 """submit the survey without the parralelization. 284 This is the old mode which is still usefull in single core""" 285 286 # write the template file for the parameter file 287 self.write_parameter(parralelization=False, Pdirs=job_list.keys()) 288 289 290 # launch the job with the appropriate grouping 291 for Pdir, jobs in job_list.items(): 292 jobs = list(jobs) 293 i=0 294 while jobs: 295 i+=1 296 to_submit = ['0'] # the first entry is actually the offset 297 for _ in range(self.combining_job_for_Pdir(Pdir)): 298 if jobs: 299 to_submit.append(jobs.pop(0)) 300 301 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 302 argument=to_submit, 303 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir))
304 305
306 - def create_resubmit_one_iter(self, Pdir, G, submit_ps, nb_job, step=0):
307 """prepare the input_file for submitting the channel""" 308 309 310 if 'SubProcesses' not in Pdir: 311 Pdir = pjoin(self.me_dir, 'SubProcesses', Pdir) 312 313 #keep track of how many job are sended 314 self.splitted_Pdir[(Pdir, G)] = int(nb_job) 315 316 317 # 1. write the new input_app.txt 318 run_card = self.cmd.run_card 319 options = {'event' : submit_ps, 320 'maxiter': 1, 321 'miniter': 1, 322 'accuracy': self.cmd.opts['accuracy'], 323 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 324 else run_card['nhel'], 325 'gridmode': -2, 326 'channel' : G 327 } 328 329 Gdir = pjoin(Pdir, 'G%s' % G) 330 self.write_parameter_file(pjoin(Gdir, 'input_app.txt'), options) 331 332 # 2. check that ftn25 exists. 333 assert os.path.exists(pjoin(Gdir, "ftn25")) 334 335 336 # 3. Submit the new jobs 337 #call back function 338 packet = cluster.Packet((Pdir, G, step+1), 339 self.combine_iteration, 340 (Pdir, G, step+1)) 341 342 if step ==0: 343 self.lastoffset[(Pdir, G)] = 0 344 345 # resubmit the new jobs 346 for i in xrange(int(nb_job)): 347 name = "G%s_%s" % (G,i+1) 348 self.lastoffset[(Pdir, G)] += 1 349 offset = self.lastoffset[(Pdir, G)] 350 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'refine_splitted.sh'), 351 argument=[name, 'G%s'%G, offset], 352 cwd= Pdir, 353 packet_member=packet)
354 355
356 - def submit_to_cluster_splitted(self, job_list):
357 """ submit the version of the survey with splitted grid creation 358 """ 359 360 #if self.splitted_grid <= 1: 361 # return self.submit_to_cluster_no_splitting(job_list) 362 363 for Pdir, jobs in job_list.items(): 364 if self.splitted_for_dir(Pdir, jobs[0]) <= 1: 365 return self.submit_to_cluster_no_splitting({Pdir:jobs}) 366 367 self.write_parameter(parralelization=True, Pdirs=[Pdir]) 368 # launch the job with the appropriate grouping 369 370 for job in jobs: 371 packet = cluster.Packet((Pdir, job, 1), self.combine_iteration, (Pdir, job, 1)) 372 for i in range(self.splitted_for_dir(Pdir, job)): 373 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 374 argument=[i+1, job], 375 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir), 376 packet_member=packet)
377
378 - def combine_iteration(self, Pdir, G, step):
379 380 grid_calculator, cross, error = self.combine_grid(Pdir, G, step) 381 382 # Compute the number of events used for this run. 383 nb_events = grid_calculator.target_evt 384 385 Gdirs = [] #build the the list of directory 386 for i in range(self.splitted_for_dir(Pdir, G)): 387 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 388 Gdirs.append(path) 389 390 # 4. make the submission of the next iteration 391 # Three cases - less than 3 iteration -> continue 392 # - more than 3 and less than 5 -> check error 393 # - more than 5 -> prepare info for refine 394 need_submit = False 395 if step < self.min_iterations and cross != 0: 396 if step == 1: 397 need_submit = True 398 else: 399 across = self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 400 tot_across = self.get_current_axsec() 401 if across / tot_across < 1e-6: 402 need_submit = False 403 elif error < self.cmd.opts['accuracy'] / 100: 404 need_submit = False 405 else: 406 need_submit = True 407 408 elif step >= self.cmd.opts['iterations']: 409 need_submit = False 410 elif self.cmd.opts['accuracy'] < 0: 411 #check for luminosity 412 raise Exception, "Not Implemented" 413 elif self.abscross[(Pdir,G)] == 0: 414 need_submit = False 415 else: 416 across = self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 417 tot_across = self.get_current_axsec() 418 if across == 0: 419 need_submit = False 420 elif across / tot_across < 1e-5: 421 need_submit = False 422 elif error > self.cmd.opts['accuracy']: 423 need_submit = True 424 else: 425 need_submit = False 426 427 428 if cross: 429 grid_calculator.write_grid_for_submission(Pdir,G, 430 self.splitted_for_dir(Pdir, G), 431 nb_events,mode=self.mode, 432 conservative_factor=5.0) 433 434 xsec_format = '.%ig'%(max(3,int(math.log10(1.0/float(error)))+2) 435 if float(cross)!=0.0 and float(error)!=0.0 else 8) 436 if need_submit: 437 message = "%%s/G%%s is at %%%s +- %%.3g pb. Now submitting iteration #%s."%(xsec_format, step+1) 438 logger.info(message%\ 439 (os.path.basename(Pdir), G, float(cross), 440 float(error)*float(cross))) 441 self.resubmit_survey(Pdir,G, Gdirs, step) 442 elif cross: 443 logger.info("Survey finished for %s/G%s at %s"%( 444 os.path.basename(Pdir),G,('%%%s +- %%.3g pb'%xsec_format))% 445 (float(cross), float(error)*float(cross))) 446 # prepare information for refine 447 newGpath = pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G) 448 if not os.path.exists(newGpath): 449 os.mkdir(newGpath) 450 451 # copy the new grid: 452 files.cp(pjoin(Gdirs[0], 'ftn25'), 453 pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 'ftn26')) 454 455 # copy the events 456 fsock = open(pjoin(newGpath, 'events.lhe'), 'w') 457 for Gdir in Gdirs: 458 fsock.write(open(pjoin(Gdir, 'events.lhe')).read()) 459 460 # copy one log 461 files.cp(pjoin(Gdirs[0], 'log.txt'), 462 pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G)) 463 464 465 # create the appropriate results.dat 466 self.write_results(grid_calculator, cross, error, Pdir, G, step) 467 else: 468 logger.info("Survey finished for %s/G%s [0 cross]", os.path.basename(Pdir),G) 469 470 Gdir = pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G) 471 if not os.path.exists(Gdir): 472 os.mkdir(Gdir) 473 # copy one log 474 files.cp(pjoin(Gdirs[0], 'log.txt'), Gdir) 475 # create the appropriate results.dat 476 self.write_results(grid_calculator, cross, error, Pdir, G, step) 477 478 return 0
479
480 - def combine_grid(self, Pdir, G, step, exclude_sub_jobs=[]):
481 """ exclude_sub_jobs is to remove some of the subjobs if a numerical 482 issue is detected in one of them. Warning is issue when this occurs. 483 """ 484 485 # 1. create an object to combine the grid information and fill it 486 grid_calculator = combine_grid.grid_information(self.run_card['nhel']) 487 488 for i in range(self.splitted_for_dir(Pdir, G)): 489 if i in exclude_sub_jobs: 490 continue 491 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 492 fsock = misc.mult_try_open(pjoin(path, 'results.dat')) 493 one_result = grid_calculator.add_results_information(fsock) 494 fsock.close() 495 if one_result.axsec == 0: 496 grid_calculator.onefail = True 497 continue # grid_information might not exists 498 fsock = misc.mult_try_open(pjoin(path, 'grid_information')) 499 grid_calculator.add_one_grid_information(fsock) 500 fsock.close() 501 os.remove(pjoin(path, 'results.dat')) 502 #os.remove(pjoin(path, 'grid_information')) 503 504 505 506 #2. combine the information about the total crossection / error 507 # start by keep the interation in memory 508 cross, across, sigma = grid_calculator.get_cross_section() 509 510 #3. Try to avoid one single PS point which ruins the integration 511 # Should be related to loop evaluation instability. 512 maxwgt = grid_calculator.get_max_wgt(0.01) 513 if maxwgt: 514 nunwgt = grid_calculator.get_nunwgt(maxwgt) 515 # Make sure not to apply the security below during the first step of the 516 # survey. Also, disregard channels with a contribution relative to the 517 # total cross-section smaller than 1e-8 since in this case it is unlikely 518 # that this channel will need more than 1 event anyway. 519 apply_instability_security = False 520 rel_contrib = 0.0 521 if (self.__class__ != gensym or step > 1): 522 Pdir_across = 0.0 523 Gdir_across = 0.0 524 for (mPdir,mG) in self.abscross.keys(): 525 if mPdir == Pdir: 526 Pdir_across += (self.abscross[(mPdir,mG)]/ 527 (self.sigma[(mPdir,mG)]+1e-99)) 528 if mG == G: 529 Gdir_across += (self.abscross[(mPdir,mG)]/ 530 (self.sigma[(mPdir,mG)]+1e-99)) 531 rel_contrib = abs(Gdir_across/(Pdir_across+1e-99)) 532 if rel_contrib > (1.0e-8) and \ 533 nunwgt < 2 and len(grid_calculator.results) > 1: 534 apply_instability_security = True 535 536 if apply_instability_security: 537 # check the ratio between the different submit 538 th_maxwgt = [(r.th_maxwgt,i) for i,r in enumerate(grid_calculator.results)] 539 th_maxwgt.sort() 540 ratio = th_maxwgt[-1][0]/th_maxwgt[-2][0] 541 if ratio > 1e4: 542 logger.warning( 543 """"One Event with large weight have been found (ratio = %.3g) in channel G%s (with rel.contrib=%.3g). 544 This is likely due to numerical instabilities. The associated job is discarded to recover. 545 For offline investigation, the problematic discarded events are stored in: 546 %s"""%(ratio,G,rel_contrib,pjoin(Pdir,'DiscardedUnstableEvents'))) 547 exclude_sub_jobs = list(exclude_sub_jobs) 548 exclude_sub_jobs.append(th_maxwgt[-1][1]) 549 grid_calculator.results.run_statistics['skipped_subchannel'] += 1 550 551 # Add some monitoring of the problematic events 552 gPath = pjoin(Pdir, "G%s_%s" % (G, th_maxwgt[-1][1]+1)) 553 if os.path.isfile(pjoin(gPath,'events.lhe')): 554 lhe_file = lhe_parser.EventFile(pjoin(gPath,'events.lhe')) 555 discardedPath = pjoin(Pdir,'DiscardedUnstableEvents') 556 if not os.path.exists(discardedPath): 557 os.mkdir(discardedPath) 558 if os.path.isdir(discardedPath): 559 # Keep only the event with a maximum weight, as it surely 560 # is the problematic one. 561 evtRecord = open(pjoin(discardedPath,'discarded_G%s.dat'%G),'a') 562 lhe_file.seek(0) #rewind the file 563 try: 564 evtRecord.write('\n'+str(max(lhe_file,key=lambda evt:abs(evt.wgt)))) 565 except Exception: 566 #something wrong write the full file. 567 lhe_file.close() 568 evtRecord.write(pjoin(gPath,'events.lhe').read()) 569 evtRecord.close() 570 571 return self.combine_grid(Pdir, G, step, exclude_sub_jobs) 572 573 574 if across !=0: 575 if sigma != 0: 576 self.cross[(Pdir,G)] += cross**3/sigma**2 577 self.abscross[(Pdir,G)] += across * cross**2/sigma**2 578 self.sigma[(Pdir,G)] += cross**2/ sigma**2 579 self.chi2[(Pdir,G)] += cross**4/sigma**2 580 # and use those iteration to get the current estimator 581 cross = self.cross[(Pdir,G)]/self.sigma[(Pdir,G)] 582 if step > 1: 583 error = math.sqrt(abs((self.chi2[(Pdir,G)]/cross**2 - \ 584 self.sigma[(Pdir,G)])/(step-1))/self.sigma[(Pdir,G)]) 585 else: 586 error = sigma/cross 587 else: 588 self.cross[(Pdir,G)] = cross 589 self.abscross[(Pdir,G)] = across 590 self.sigma[(Pdir,G)] = 0 591 self.chi2[(Pdir,G)] = 0 592 cross = self.cross[(Pdir,G)] 593 error = 0 594 595 else: 596 error = 0 597 598 grid_calculator.results.compute_values(update_statistics=True) 599 if (str(os.path.basename(Pdir)), G) in self.run_statistics: 600 self.run_statistics[(str(os.path.basename(Pdir)), G)]\ 601 .aggregate_statistics(grid_calculator.results.run_statistics) 602 else: 603 self.run_statistics[(str(os.path.basename(Pdir)), G)] = \ 604 grid_calculator.results.run_statistics 605 606 self.warnings_from_statistics(G, grid_calculator.results.run_statistics) 607 stats_msg = grid_calculator.results.run_statistics.nice_output( 608 '/'.join([os.path.basename(Pdir),'G%s'%G])) 609 610 if stats_msg: 611 logger.log(5, stats_msg) 612 613 # Clean up grid_information to avoid border effects in case of a crash 614 for i in range(self.splitted_for_dir(Pdir, G)): 615 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 616 try: 617 os.remove(pjoin(path, 'grid_information')) 618 except OSError, oneerror: 619 if oneerror.errno != 2: 620 raise 621 return grid_calculator, cross, error
622
623 - def warnings_from_statistics(self,G,stats):
624 """Possible warn user for worrying MadLoop stats for this channel.""" 625 626 if stats['n_madloop_calls']==0: 627 return 628 629 EPS_fraction = float(stats['exceptional_points'])/stats['n_madloop_calls'] 630 631 msg = "Channel %s has encountered a fraction of %.3g\n"+ \ 632 "of numerically unstable loop matrix element computations\n"+\ 633 "(which could not be rescued using quadruple precision).\n"+\ 634 "The results might not be trusted." 635 636 if 0.01 > EPS_fraction > 0.001: 637 logger.warning(msg%(G,EPS_fraction)) 638 elif EPS_fraction > 0.01: 639 logger.critical((msg%(G,EPS_fraction)).replace('might', 'can')) 640 raise Exception, (msg%(G,EPS_fraction)).replace('might', 'can')
641
642 - def get_current_axsec(self):
643 644 across = 0 645 for (Pdir,G) in self.abscross: 646 across += self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 647 return across
648
649 - def write_results(self, grid_calculator, cross, error, Pdir, G, step):
650 651 #compute the value 652 if cross == 0: 653 abscross,nw, luminosity = 0, 0, 0 654 wgt, maxit,nunwgt, wgt, nevents = 0,0,0,0,0 655 maxwgt = 0 656 error = 0 657 else: 658 grid_calculator.results.compute_values() 659 abscross = self.abscross[(Pdir,G)]/self.sigma[(Pdir,G)] 660 nw = grid_calculator.results.nw 661 wgt = grid_calculator.results.wgt 662 maxit = step 663 wgt = 0 664 nevents = grid_calculator.results.nevents 665 maxwgt = grid_calculator.get_max_wgt() 666 nunwgt = grid_calculator.get_nunwgt() 667 luminosity = nunwgt/cross 668 669 #format the results.dat 670 def fstr(nb): 671 data = '%E' % nb 672 nb, power = data.split('E') 673 nb = float(nb) /10 674 power = int(power) + 1 675 return '%.5fE%+03i' %(nb,power)
676 line = '%s %s %s %i %i %i %i %s %s %s %s 0.0 0\n' % \ 677 (fstr(cross), fstr(error*cross), fstr(error*cross), 678 nevents, nw, maxit,nunwgt, 679 fstr(luminosity), fstr(wgt), fstr(abscross), fstr(maxwgt)) 680 681 fsock = open(pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 682 'results.dat'),'w') 683 fsock.writelines(line) 684 fsock.close()
685
686 - def resubmit_survey(self, Pdir, G, Gdirs, step):
687 """submit the next iteration of the survey""" 688 689 # 1. write the new input_app.txt to double the number of points 690 run_card = self.cmd.run_card 691 options = {'event' : 2**(step) * self.cmd.opts['points'] / self.splitted_grid, 692 'maxiter': 1, 693 'miniter': 1, 694 'accuracy': self.cmd.opts['accuracy'], 695 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 696 else run_card['nhel'], 697 'gridmode': -2, 698 'channel' : '' 699 } 700 701 if int(options['helicity']) == 1: 702 options['event'] = options['event'] * 2**(self.cmd.proc_characteristics['nexternal']//3) 703 704 for Gdir in Gdirs: 705 self.write_parameter_file(pjoin(Gdir, 'input_app.txt'), options) 706 707 708 #2. resubmit the new jobs 709 packet = cluster.Packet((Pdir, G, step+1), self.combine_iteration, \ 710 (Pdir, G, step+1)) 711 nb_step = len(Gdirs) * (step+1) 712 for i,subdir in enumerate(Gdirs): 713 subdir = subdir.rsplit('_',1)[1] 714 subdir = int(subdir) 715 offset = nb_step+i+1 716 offset=str(offset) 717 tag = "%s.%s" % (subdir, offset) 718 719 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 720 argument=[tag, G], 721 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir), 722 packet_member=packet)
723 724 725 726
727 - def write_parameter_file(self, path, options):
728 """ """ 729 730 template =""" %(event)s %(maxiter)s %(miniter)s !Number of events and max and min iterations 731 %(accuracy)s !Accuracy 732 %(gridmode)s !Grid Adjustment 0=none, 2=adjust 733 1 !Suppress Amplitude 1=yes 734 %(helicity)s !Helicity Sum/event 0=exact 735 %(channel)s """ 736 options['event'] = int(options['event']) 737 open(path, 'w').write(template % options)
738 739 740
741 - def write_parameter(self, parralelization, Pdirs=None):
742 """Write the parameter of the survey run""" 743 744 run_card = self.cmd.run_card 745 746 options = {'event' : self.cmd.opts['points'], 747 'maxiter': self.cmd.opts['iterations'], 748 'miniter': self.min_iterations, 749 'accuracy': self.cmd.opts['accuracy'], 750 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 751 else run_card['nhel'], 752 'gridmode': 2, 753 'channel': '' 754 } 755 756 if int(options['helicity'])== 1: 757 options['event'] = options['event'] * 2**(self.cmd.proc_characteristics['nexternal']//3) 758 759 if parralelization: 760 options['gridmode'] = -2 761 options['maxiter'] = 1 #this is automatic in dsample anyway 762 options['miniter'] = 1 #this is automatic in dsample anyway 763 options['event'] /= self.splitted_grid 764 765 if not Pdirs: 766 Pdirs = self.subproc 767 768 for Pdir in Pdirs: 769 path =pjoin(Pdir, 'input_app.txt') 770 self.write_parameter_file(path, options)
771
772 773 774 -class gen_ximprove(object):
775 776 777 # some hardcoded value which impact the generation 778 gen_events_security = 1.2 # multiply the number of requested event by this number for security 779 combining_job = 0 # allow to run multiple channel in sequence 780 max_request_event = 1000 # split jobs if a channel if it needs more than that 781 max_event_in_iter = 5000 782 min_event_in_iter = 1000 783 max_splitting = 130 # maximum duplication of a given channel 784 min_iter = 3 785 max_iter = 9 786 keep_grid_for_refine = False # only apply if needed to split the job 787 788 #convenient shortcut for the formatting of variable 789 @ staticmethod
790 - def format_variable(*args):
791 return bannermod.ConfigFile.format_variable(*args)
792 793
794 - def __new__(cls, cmd, opt):
795 """Choose in which type of refine we want to be""" 796 797 if cmd.proc_characteristics['loop_induced']: 798 return super(gen_ximprove, cls).__new__(gen_ximprove_share, cmd, opt) 799 elif gen_ximprove.format_variable(cmd.run_card['gridpack'], bool): 800 return super(gen_ximprove, cls).__new__(gen_ximprove_gridpack, cmd, opt) 801 elif cmd.run_card["job_strategy"] == 2: 802 return super(gen_ximprove, cls).__new__(gen_ximprove_share, cmd, opt) 803 else: 804 return super(gen_ximprove, cls).__new__(gen_ximprove_v4, cmd, opt)
805 806
807 - def __init__(self, cmd, opt=None):
808 809 try: 810 super(gen_ximprove, self).__init__(cmd, opt) 811 except TypeError: 812 pass 813 814 self.run_statistics = {} 815 self.cmd = cmd 816 self.run_card = cmd.run_card 817 run_card = self.run_card 818 self.me_dir = cmd.me_dir 819 820 #extract from the run_card the information that we need. 821 self.gridpack = run_card['gridpack'] 822 self.nhel = run_card['nhel'] 823 if "nhel_refine" in run_card: 824 self.nhel = run_card["nhel_refine"] 825 826 if self.run_card['refine_evt_by_job'] != -1: 827 self.max_request_event = run_card['refine_evt_by_job'] 828 829 830 # Default option for the run 831 self.gen_events = True 832 self.min_iter = 3 833 self.parralel = False 834 # parameter which was input for the normal gen_ximprove run 835 self.err_goal = 0.01 836 self.max_np = 9 837 self.split_channels = False 838 # parameter for the gridpack run 839 self.nreq = 2000 840 self.iseed = 4321 841 842 # placeholder for information 843 self.results = 0 #updated in launch/update_html 844 845 if isinstance(opt, dict): 846 self.configure(opt) 847 elif isinstance(opt, bannermod.GridpackCard): 848 self.configure_gridpack(opt)
849
850 - def __call__(self):
851 return self.launch()
852
853 - def launch(self):
854 """running """ 855 856 #start the run 857 self.handle_seed() 858 self.results = sum_html.collect_result(self.cmd, 859 main_dir=pjoin(self.cmd.me_dir,'SubProcesses')) #main_dir is for gridpack readonly mode 860 if self.gen_events: 861 # We run to provide a given number of events 862 self.get_job_for_event() 863 else: 864 # We run to achieve a given precision 865 self.get_job_for_precision()
866 867
868 - def configure(self, opt):
869 """Defines some parameter of the run""" 870 871 for key, value in opt.items(): 872 if key in self.__dict__: 873 targettype = type(getattr(self, key)) 874 setattr(self, key, self.format_variable(value, targettype, key)) 875 else: 876 raise Exception, '%s not define' % key 877 878 879 # special treatment always do outside the loop to avoid side effect 880 if 'err_goal' in opt: 881 if self.err_goal < 1: 882 logger.info("running for accuracy %s%%" % (self.err_goal*100)) 883 self.gen_events = False 884 elif self.err_goal >= 1: 885 logger.info("Generating %s unweigthed events." % self.err_goal) 886 self.gen_events = True 887 self.err_goal = self.err_goal * self.gen_events_security # security
888
889 - def handle_seed(self):
890 """not needed but for gridpack --which is not handle here for the moment""" 891 return
892 893
894 - def find_job_for_event(self):
895 """return the list of channel that need to be improved""" 896 897 assert self.err_goal >=1 898 self.err_goal = int(self.err_goal) 899 900 goal_lum = self.err_goal/(self.results.axsec+1e-99) #pb^-1 901 logger.info('Effective Luminosity %s pb^-1', goal_lum) 902 903 all_channels = sum([list(P) for P in self.results],[]) 904 all_channels.sort(cmp= lambda x,y: 1 if y.get('luminosity') - \ 905 x.get('luminosity') > 0 else -1) 906 907 to_refine = [] 908 for C in all_channels: 909 if C.get('axsec') == 0: 910 continue 911 if goal_lum/(C.get('luminosity')+1e-99) >= 1 + (self.gen_events_security-1)/2: 912 logger.debug("channel %s is at %s (%s) (%s pb)", C.name, C.get('luminosity'), goal_lum/(C.get('luminosity')+1e-99), C.get('xsec')) 913 to_refine.append(C) 914 elif C.get('xerr') > max(C.get('axsec'), 915 (1/(100*math.sqrt(self.err_goal)))*all_channels[-1].get('axsec')): 916 to_refine.append(C) 917 918 logger.info('need to improve %s channels' % len(to_refine)) 919 return goal_lum, to_refine
920
921 - def update_html(self):
922 """update the html from this object since it contains all the information""" 923 924 925 run = self.cmd.results.current['run_name'] 926 if not os.path.exists(pjoin(self.cmd.me_dir, 'HTML', run)): 927 os.mkdir(pjoin(self.cmd.me_dir, 'HTML', run)) 928 929 unit = self.cmd.results.unit 930 P_text = "" 931 if self.results: 932 Presults = self.results 933 else: 934 self.results = sum_html.collect_result(self.cmd, None) 935 Presults = self.results 936 937 for P_comb in Presults: 938 P_text += P_comb.get_html(run, unit, self.cmd.me_dir) 939 940 Presults.write_results_dat(pjoin(self.cmd.me_dir,'SubProcesses', 'results.dat')) 941 942 fsock = open(pjoin(self.cmd.me_dir, 'HTML', run, 'results.html'),'w') 943 fsock.write(sum_html.results_header) 944 fsock.write('%s <dl>' % Presults.get_html(run, unit, self.cmd.me_dir)) 945 fsock.write('%s </dl></body>' % P_text) 946 947 self.cmd.results.add_detail('cross', Presults.xsec) 948 self.cmd.results.add_detail('error', Presults.xerru) 949 950 return Presults.xsec, Presults.xerru
951
952 953 -class gen_ximprove_v4(gen_ximprove):
954 955 # some hardcoded value which impact the generation 956 gen_events_security = 1.2 # multiply the number of requested event by this number for security 957 combining_job = 0 # allow to run multiple channel in sequence 958 max_request_event = 1000 # split jobs if a channel if it needs more than that 959 max_event_in_iter = 5000 960 min_event_in_iter = 1000 961 max_splitting = 130 # maximum duplication of a given channel 962 min_iter = 3 963 max_iter = 9 964 keep_grid_for_refine = False # only apply if needed to split the job 965 966 967
968 - def __init__(self, cmd, opt=None):
969 970 super(gen_ximprove_v4, self).__init__(cmd, opt) 971 972 if cmd.opts['accuracy'] < cmd._survey_options['accuracy'][1]: 973 self.increase_precision()
974
975 - def reset_multijob(self):
976 977 for path in misc.glob(pjoin('*', '*','multijob.dat'), pjoin(self.me_dir, 'SubProcesses')): 978 open(path,'w').write('0\n')
979
980 - def write_multijob(self, Channel, nb_split):
981 """ """ 982 if nb_split <=1: 983 return 984 f = open(pjoin(self.me_dir, 'SubProcesses', Channel.get('name'), 'multijob.dat'), 'w') 985 f.write('%i\n' % nb_split) 986 f.close()
987
988 - def increase_precision(self):
989 990 self.max_event_in_iter = 20000 991 self.min_events = 7500 992 if int(self.nhel) == 1: 993 self.min_event_in_iter *= 2**(self.cmd.proc_characteristics['nexternal']//3) 994 self.max_event_in_iter *= 2**(self.cmd.proc_characteristics['nexternal']//2) 995 996 self.gen_events_security = 1.3
997 998 alphabet = "abcdefghijklmnopqrstuvwxyz"
999 - def get_job_for_event(self):
1000 """generate the script in order to generate a given number of event""" 1001 # correspond to write_gen in the fortran version 1002 1003 1004 goal_lum, to_refine = self.find_job_for_event() 1005 1006 #reset the potential multijob of previous run 1007 self.reset_multijob() 1008 1009 jobs = [] # list of the refine if some job are split is list of 1010 # dict with the parameter of the run. 1011 1012 # try to have a smart load on the cluster (not really important actually) 1013 if self.combining_job >1: 1014 # add a nice ordering for the jobs 1015 new_order = [] 1016 if self.combining_job % 2 == 0: 1017 for i in range(len(to_refine) //2): 1018 new_order.append(to_refine[i]) 1019 new_order.append(to_refine[-i-1]) 1020 if len(to_refine) % 2: 1021 new_order.append(to_refine[i+1]) 1022 else: 1023 for i in range(len(to_refine) //3): 1024 new_order.append(to_refine[i]) 1025 new_order.append(to_refine[-2*i-1]) 1026 new_order.append(to_refine[-2*i-2]) 1027 if len(to_refine) % 3 == 1: 1028 new_order.append(to_refine[i+1]) 1029 elif len(to_refine) % 3 == 2: 1030 new_order.append(to_refine[i+2]) 1031 #ensure that the reordering is done nicely 1032 assert set([id(C) for C in to_refine]) == set([id(C) for C in new_order]) 1033 to_refine = new_order 1034 1035 1036 # loop over the channel to refine 1037 for C in to_refine: 1038 #1. Compute the number of points are needed to reach target 1039 needed_event = goal_lum*C.get('axsec') 1040 nb_split = int(max(1,((needed_event-1)// self.max_request_event) +1)) 1041 if not self.split_channels: 1042 nb_split = 1 1043 if nb_split > self.max_splitting: 1044 nb_split = self.max_splitting 1045 nb_split=max(1, nb_split) 1046 1047 1048 #2. estimate how many points we need in each iteration 1049 if C.get('nunwgt') > 0: 1050 nevents = needed_event / nb_split * (C.get('nevents') / C.get('nunwgt')) 1051 #split by iter 1052 nevents = int(nevents / (2**self.min_iter-1)) 1053 else: 1054 nevents = self.max_event_in_iter 1055 1056 if nevents < self.min_event_in_iter: 1057 nb_split = int(nb_split * nevents / self.min_event_in_iter) + 1 1058 nevents = self.min_event_in_iter 1059 # 1060 # forbid too low/too large value 1061 nevents = max(self.min_event_in_iter, min(self.max_event_in_iter, nevents)) 1062 logger.debug("%s : need %s event. Need %s split job of %s points", C.name, needed_event, nb_split, nevents) 1063 1064 1065 # write the multi-job information 1066 self.write_multijob(C, nb_split) 1067 1068 packet = cluster.Packet((C.parent_name, C.name), 1069 combine_runs.CombineRuns, 1070 (pjoin(self.me_dir, 'SubProcesses', C.parent_name)), 1071 {"subproc": C.name, "nb_split":nb_split}) 1072 1073 1074 #create the info dict assume no splitting for the default 1075 info = {'name': self.cmd.results.current['run_name'], 1076 'script_name': 'unknown', 1077 'directory': C.name, # need to be change for splitted job 1078 'P_dir': C.parent_name, 1079 'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), 1080 'offset': 1, # need to be change for splitted job 1081 'nevents': nevents, 1082 'maxiter': self.max_iter, 1083 'miniter': self.min_iter, 1084 'precision': -goal_lum/nb_split, 1085 'nhel': self.run_card['nhel'], 1086 'channel': C.name.replace('G',''), 1087 'grid_refinment' : 0, #no refinment of the grid 1088 'base_directory': '', #should be change in splitted job if want to keep the grid 1089 'packet': packet, 1090 } 1091 1092 if nb_split == 1: 1093 jobs.append(info) 1094 else: 1095 for i in range(nb_split): 1096 new_info = dict(info) 1097 new_info['offset'] = i+1 1098 new_info['directory'] += self.alphabet[i % 26] + str((i+1)//26) 1099 if self.keep_grid_for_refine: 1100 new_info['base_directory'] = info['directory'] 1101 jobs.append(new_info) 1102 1103 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs)
1104 1105
1106 - def create_ajob(self, template, jobs, write_dir=None):
1107 """create the ajob""" 1108 1109 if not jobs: 1110 return 1111 1112 if not write_dir: 1113 write_dir = pjoin(self.me_dir, 'SubProcesses') 1114 1115 #filter the job according to their SubProcess directory # no mix submition 1116 P2job= collections.defaultdict(list) 1117 for j in jobs: 1118 P2job[j['P_dir']].append(j) 1119 if len(P2job) >1: 1120 for P in P2job.values(): 1121 self.create_ajob(template, P, write_dir) 1122 return 1123 1124 1125 #Here we can assume that all job are for the same directory. 1126 path = pjoin(write_dir, jobs[0]['P_dir']) 1127 1128 template_text = open(template, 'r').read() 1129 # special treatment if needed to combine the script 1130 # computes how many submition miss one job 1131 if self.combining_job > 1: 1132 skip1=0 1133 n_channels = len(jobs) 1134 nb_sub = n_channels // self.combining_job 1135 nb_job_in_last = n_channels % self.combining_job 1136 if nb_sub == 0: 1137 nb_sub = 1 1138 nb_job_in_last =0 1139 if nb_job_in_last: 1140 nb_sub +=1 1141 skip1 = self.combining_job - nb_job_in_last 1142 if skip1 > nb_sub: 1143 self.combining_job -=1 1144 return self.create_ajob(template, jobs, write_dir) 1145 combining_job = self.combining_job 1146 else: 1147 #define the variable for combining jobs even in not combine mode 1148 #such that we can use the same routine 1149 skip1=0 1150 combining_job =1 1151 nb_sub = len(jobs) 1152 1153 1154 nb_use = 0 1155 for i in range(nb_sub): 1156 script_number = i+1 1157 if i < skip1: 1158 nb_job = combining_job -1 1159 else: 1160 nb_job = min(combining_job, len(jobs)) 1161 fsock = open(pjoin(path, 'ajob%i' % script_number), 'w') 1162 for j in range(nb_use, nb_use + nb_job): 1163 if j> len(jobs): 1164 break 1165 info = jobs[j] 1166 info['script_name'] = 'ajob%i' % script_number 1167 info['keeplog'] = 'false' 1168 if "base_directory" not in info: 1169 info["base_directory"] = "./" 1170 fsock.write(template_text % info) 1171 nb_use += nb_job 1172 1173 fsock.close() 1174 return script_number
1175
1176 - def get_job_for_precision(self):
1177 """create the ajob to achieve a give precision on the total cross-section""" 1178 1179 1180 assert self.err_goal <=1 1181 xtot = abs(self.results.xsec) 1182 logger.info("Working on precision: %s %%" %(100*self.err_goal)) 1183 all_channels = sum([list(P) for P in self.results if P.mfactor],[]) 1184 limit = self.err_goal * xtot / len(all_channels) 1185 to_refine = [] 1186 rerr = 0 #error of the job not directly selected 1187 for C in all_channels: 1188 cerr = C.mfactor*(C.xerru + len(all_channels)*C.xerrc) 1189 if cerr > abs(limit): 1190 to_refine.append(C) 1191 else: 1192 rerr += cerr 1193 rerr *=rerr 1194 if not len(to_refine): 1195 return 1196 1197 # change limit since most don't contribute 1198 limit = math.sqrt((self.err_goal * xtot)**2 - rerr/math.sqrt(len(to_refine))) 1199 for C in to_refine[:]: 1200 cerr = C.mfactor*(C.xerru + len(to_refine)*C.xerrc) 1201 if cerr < limit: 1202 to_refine.remove(C) 1203 1204 # all the channel are now selected. create the channel information 1205 logger.info('need to improve %s channels' % len(to_refine)) 1206 1207 1208 jobs = [] # list of the refine if some job are split is list of 1209 # dict with the parameter of the run. 1210 1211 # loop over the channel to refine 1212 for C in to_refine: 1213 1214 #1. Determine how many events we need in each iteration 1215 yerr = C.mfactor*(C.xerru+len(to_refine)*C.xerrc) 1216 nevents = 0.2*C.nevents*(yerr/limit)**2 1217 1218 nb_split = int((nevents*(C.nunwgt/C.nevents)/self.max_request_event/ (2**self.min_iter-1))**(2/3)) 1219 nb_split = max(nb_split, 1) 1220 # **(2/3) to slow down the increase in number of jobs 1221 if nb_split > self.max_splitting: 1222 nb_split = self.max_splitting 1223 1224 if nb_split >1: 1225 nevents = nevents / nb_split 1226 self.write_multijob(C, nb_split) 1227 # forbid too low/too large value 1228 nevents = min(self.min_event_in_iter, max(self.max_event_in_iter, nevents)) 1229 1230 1231 #create the info dict assume no splitting for the default 1232 info = {'name': self.cmd.results.current['run_name'], 1233 'script_name': 'unknown', 1234 'directory': C.name, # need to be change for splitted job 1235 'P_dir': C.parent_name, 1236 'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), 1237 'offset': 1, # need to be change for splitted job 1238 'nevents': nevents, 1239 'maxiter': self.max_iter, 1240 'miniter': self.min_iter, 1241 'precision': yerr/math.sqrt(nb_split)/(C.get('xsec')+ yerr), 1242 'nhel': self.run_card['nhel'], 1243 'channel': C.name.replace('G',''), 1244 'grid_refinment' : 1 1245 } 1246 1247 if nb_split == 1: 1248 jobs.append(info) 1249 else: 1250 for i in range(nb_split): 1251 new_info = dict(info) 1252 new_info['offset'] = i+1 1253 new_info['directory'] += self.alphabet[i % 26] + str((i+1)//26) 1254 jobs.append(new_info) 1255 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs)
1256
1257 - def update_html(self):
1258 """update the html from this object since it contains all the information""" 1259 1260 1261 run = self.cmd.results.current['run_name'] 1262 if not os.path.exists(pjoin(self.cmd.me_dir, 'HTML', run)): 1263 os.mkdir(pjoin(self.cmd.me_dir, 'HTML', run)) 1264 1265 unit = self.cmd.results.unit 1266 P_text = "" 1267 if self.results: 1268 Presults = self.results 1269 else: 1270 self.results = sum_html.collect_result(self.cmd, None) 1271 Presults = self.results 1272 1273 for P_comb in Presults: 1274 P_text += P_comb.get_html(run, unit, self.cmd.me_dir) 1275 1276 Presults.write_results_dat(pjoin(self.cmd.me_dir,'SubProcesses', 'results.dat')) 1277 1278 fsock = open(pjoin(self.cmd.me_dir, 'HTML', run, 'results.html'),'w') 1279 fsock.write(sum_html.results_header) 1280 fsock.write('%s <dl>' % Presults.get_html(run, unit, self.cmd.me_dir)) 1281 fsock.write('%s </dl></body>' % P_text) 1282 1283 self.cmd.results.add_detail('cross', Presults.xsec) 1284 self.cmd.results.add_detail('error', Presults.xerru) 1285 1286 return Presults.xsec, Presults.xerru
1287
1288 1289 1290 1291 -class gen_ximprove_v4_nogridupdate(gen_ximprove_v4):
1292 1293 # some hardcoded value which impact the generation 1294 gen_events_security = 1.1 # multiply the number of requested event by this number for security 1295 combining_job = 0 # allow to run multiple channel in sequence 1296 max_request_event = 400 # split jobs if a channel if it needs more than that 1297 max_event_in_iter = 500 1298 min_event_in_iter = 250 1299 max_splitting = 260 # maximum duplication of a given channel 1300 min_iter = 2 1301 max_iter = 6 1302 keep_grid_for_refine = True 1303 1304
1305 - def __init__(self, cmd, opt=None):
1306 1307 gen_ximprove.__init__(cmd, opt) 1308 1309 if cmd.proc_characteristics['loopinduced'] and \ 1310 cmd.proc_characteristics['nexternal'] > 2: 1311 self.increase_parralelization(cmd.proc_characteristics['nexternal'])
1312
1313 - def increase_parralelization(self, nexternal):
1314 1315 self.max_splitting = 1000 1316 1317 if self.run_card['refine_evt_by_job'] != -1: 1318 pass 1319 elif nexternal == 3: 1320 self.max_request_event = 200 1321 elif nexternal == 4: 1322 self.max_request_event = 100 1323 elif nexternal >= 5: 1324 self.max_request_event = 50 1325 self.min_event_in_iter = 125 1326 self.max_iter = 5
1327
1328 -class gen_ximprove_share(gen_ximprove, gensym):
1329 """Doing the refine in multicore. Each core handle a couple of PS point.""" 1330 1331 nb_ps_by_job = 2000 1332 mode = "refine" 1333 gen_events_security = 1.15 1334 # Note the real security is lower since we stop the jobs if they are at 96% 1335 # of this target. 1336
1337 - def __init__(self, *args, **opts):
1338 1339 super(gen_ximprove_share, self).__init__(*args, **opts) 1340 self.generated_events = {} 1341 self.splitted_for_dir = lambda x,y : self.splitted_Pdir[(x,y)]
1342 1343
1344 - def get_job_for_event(self):
1345 """generate the script in order to generate a given number of event""" 1346 # correspond to write_gen in the fortran version 1347 1348 1349 goal_lum, to_refine = self.find_job_for_event() 1350 self.goal_lum = goal_lum 1351 1352 # loop over the channel to refine to find the number of PS point to launch 1353 total_ps_points = 0 1354 channel_to_ps_point = [] 1355 for C in to_refine: 1356 #0. remove previous events files 1357 try: 1358 os.remove(pjoin(self.me_dir, "SubProcesses",C.parent_name, C.name, "events.lhe")) 1359 except: 1360 pass 1361 1362 #1. Compute the number of points are needed to reach target 1363 needed_event = goal_lum*C.get('axsec') 1364 if needed_event == 0: 1365 continue 1366 #2. estimate how many points we need in each iteration 1367 if C.get('nunwgt') > 0: 1368 nevents = needed_event * (C.get('nevents') / C.get('nunwgt')) 1369 #split by iter 1370 nevents = int(nevents / (2**self.min_iter-1)) 1371 else: 1372 nb_split = int(max(1,((needed_event-1)// self.max_request_event) +1)) 1373 if not self.split_channels: 1374 nb_split = 1 1375 if nb_split > self.max_splitting: 1376 nb_split = self.max_splitting 1377 nevents = self.max_event_in_iter * self.max_splitting 1378 else: 1379 nevents = self.max_event_in_iter * nb_split 1380 1381 if nevents > self.max_splitting*self.max_event_in_iter: 1382 logger.warning("Channel %s/%s has a very low efficiency of unweighting. Might not be possible to reach target" % \ 1383 (C.name, C.parent_name)) 1384 nevents = self.max_event_in_iter * self.max_splitting 1385 1386 total_ps_points += nevents 1387 channel_to_ps_point.append((C, nevents)) 1388 1389 if self.cmd.options["run_mode"] == 1: 1390 if self.cmd.options["cluster_size"]: 1391 nb_ps_by_job = total_ps_points /int(self.cmd.options["cluster_size"]) 1392 else: 1393 nb_ps_by_job = self.nb_ps_by_job 1394 elif self.cmd.options["run_mode"] == 2: 1395 remain = total_ps_points % self.cmd.options["nb_core"] 1396 if remain: 1397 nb_ps_by_job = 1 + (total_ps_points - remain) / self.cmd.options["nb_core"] 1398 else: 1399 nb_ps_by_job = total_ps_points / self.cmd.options["nb_core"] 1400 else: 1401 nb_ps_by_job = self.nb_ps_by_job 1402 1403 nb_ps_by_job = int(max(nb_ps_by_job, 500)) 1404 1405 for C, nevents in channel_to_ps_point: 1406 if nevents % nb_ps_by_job: 1407 nb_job = 1 + int(nevents // nb_ps_by_job) 1408 else: 1409 nb_job = int(nevents // nb_ps_by_job) 1410 submit_ps = min(nevents, nb_ps_by_job) 1411 if nb_job == 1: 1412 submit_ps = max(submit_ps, self.min_event_in_iter) 1413 self.create_resubmit_one_iter(C.parent_name, C.name[1:], submit_ps, nb_job, step=0) 1414 needed_event = goal_lum*C.get('xsec') 1415 logger.debug("%s/%s : need %s event. Need %s split job of %s points", C.parent_name, C.name, needed_event, nb_job, submit_ps)
1416 1417
1418 - def combine_iteration(self, Pdir, G, step):
1419 1420 grid_calculator, cross, error = self.combine_grid(Pdir, G, step) 1421 1422 # collect all the generated_event 1423 Gdirs = [] #build the the list of directory 1424 for i in range(self.splitted_for_dir(Pdir, G)): 1425 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 1426 Gdirs.append(path) 1427 assert len(grid_calculator.results) == len(Gdirs) == self.splitted_for_dir(Pdir, G) 1428 1429 1430 # Check how many events are going to be kept after un-weighting. 1431 needed_event = cross * self.goal_lum 1432 if needed_event == 0: 1433 return 0 1434 # check that the number of events requested is not higher than the actual 1435 # total number of events to generate. 1436 if self.err_goal >=1: 1437 if needed_event > self.gen_events_security * self.err_goal: 1438 needed_event = int(self.gen_events_security * self.err_goal) 1439 1440 if (Pdir, G) in self.generated_events: 1441 old_nunwgt, old_maxwgt = self.generated_events[(Pdir, G)] 1442 else: 1443 old_nunwgt, old_maxwgt = 0, 0 1444 1445 if old_nunwgt == 0 and os.path.exists(pjoin(Pdir,"G%s" % G, "events.lhe")): 1446 # possible for second refine. 1447 lhe = lhe_parser.EventFile(pjoin(Pdir,"G%s" % G, "events.lhe")) 1448 old_nunwgt = lhe.unweight(None, trunc_error=0.005, log_level=0) 1449 old_maxwgt = lhe.max_wgt 1450 1451 1452 1453 maxwgt = max(grid_calculator.get_max_wgt(), old_maxwgt) 1454 new_evt = grid_calculator.get_nunwgt(maxwgt) 1455 efficiency = new_evt / sum([R.nevents for R in grid_calculator.results]) 1456 nunwgt = old_nunwgt * old_maxwgt / maxwgt 1457 nunwgt += new_evt 1458 1459 # check the number of event for this iteration alone 1460 one_iter_nb_event = max(grid_calculator.get_nunwgt(),1) 1461 drop_previous_iteration = False 1462 # compare the number of events to generate if we discard the previous iteration 1463 n_target_one_iter = (needed_event-one_iter_nb_event) / ( one_iter_nb_event/ sum([R.nevents for R in grid_calculator.results])) 1464 n_target_combined = (needed_event-nunwgt) / efficiency 1465 if n_target_one_iter < n_target_combined: 1466 # the last iteration alone has more event that the combine iteration. 1467 # it is therefore interesting to drop previous iteration. 1468 drop_previous_iteration = True 1469 nunwgt = one_iter_nb_event 1470 maxwgt = grid_calculator.get_max_wgt() 1471 new_evt = nunwgt 1472 efficiency = ( one_iter_nb_event/ sum([R.nevents for R in grid_calculator.results])) 1473 1474 try: 1475 if drop_previous_iteration: 1476 raise IOError 1477 output_file = open(pjoin(Pdir,"G%s" % G, "events.lhe"), 'a') 1478 except IOError: 1479 output_file = open(pjoin(Pdir,"G%s" % G, "events.lhe"), 'w') 1480 1481 misc.call(["cat"] + [pjoin(d, "events.lhe") for d in Gdirs], 1482 stdout=output_file) 1483 output_file.close() 1484 # For large number of iteration. check the number of event by doing the 1485 # real unweighting. 1486 if nunwgt < 0.6 * needed_event and step > self.min_iter: 1487 lhe = lhe_parser.EventFile(output_file.name) 1488 old_nunwgt =nunwgt 1489 nunwgt = lhe.unweight(None, trunc_error=0.01, log_level=0) 1490 1491 1492 self.generated_events[(Pdir, G)] = (nunwgt, maxwgt) 1493 1494 # misc.sprint("Adding %s event to %s. Currently at %s" % (new_evt, G, nunwgt)) 1495 # check what to do 1496 if nunwgt >= int(0.96*needed_event)+1: # 0.96*1.15=1.10 =real security 1497 # We did it. 1498 logger.info("found enough event for %s/G%s" % (os.path.basename(Pdir), G)) 1499 self.write_results(grid_calculator, cross, error, Pdir, G, step, efficiency) 1500 return 0 1501 elif step >= self.max_iter: 1502 logger.debug("fail to find enough event") 1503 self.write_results(grid_calculator, cross, error, Pdir, G, step, efficiency) 1504 return 0 1505 1506 nb_split_before = len(grid_calculator.results) 1507 nevents = grid_calculator.results[0].nevents 1508 if nevents == 0: # possible if some integral returns 0 1509 nevents = max(g.nevents for g in grid_calculator.results) 1510 1511 need_ps_point = (needed_event - nunwgt)/(efficiency+1e-99) 1512 need_job = need_ps_point // nevents + 1 1513 1514 if step < self.min_iter: 1515 # This is normal but check if we are on the good track 1516 job_at_first_iter = nb_split_before/2**(step-1) 1517 expected_total_job = job_at_first_iter * (2**self.min_iter-1) 1518 done_job = job_at_first_iter * (2**step-1) 1519 expected_remaining_job = expected_total_job - done_job 1520 1521 logger.debug("efficiency status (smaller is better): %s", need_job/expected_remaining_job) 1522 # increase if needed but not too much 1523 need_job = min(need_job, expected_remaining_job*1.25) 1524 1525 nb_job = (need_job-0.5)//(2**(self.min_iter-step)-1) + 1 1526 nb_job = max(1, nb_job) 1527 grid_calculator.write_grid_for_submission(Pdir,G, 1528 self.splitted_for_dir(Pdir, G), nb_job*nevents ,mode=self.mode, 1529 conservative_factor=self.max_iter) 1530 logger.info("%s/G%s is at %i/%i (%.2g%%) event. Resubmit %i job at iteration %i." \ 1531 % (os.path.basename(Pdir), G, int(nunwgt),int(needed_event)+1, 1532 (float(nunwgt)/needed_event)*100.0 if needed_event>0.0 else 0.0, 1533 nb_job, step)) 1534 self.create_resubmit_one_iter(Pdir, G, nevents, nb_job, step) 1535 #self.create_job(Pdir, G, nb_job, nevents, step) 1536 1537 elif step < self.max_iter: 1538 if step + 1 == self.max_iter: 1539 need_job = 1.20 * need_job # avoid to have just too few event. 1540 1541 nb_job = int(min(need_job, nb_split_before*1.5)) 1542 grid_calculator.write_grid_for_submission(Pdir,G, 1543 self.splitted_for_dir(Pdir, G), nb_job*nevents ,mode=self.mode, 1544 conservative_factor=self.max_iter) 1545 1546 1547 logger.info("%s/G%s is at %i/%i ('%.2g%%') event. Resubmit %i job at iteration %i." \ 1548 % (os.path.basename(Pdir), G, int(nunwgt),int(needed_event)+1, 1549 (float(nunwgt)/needed_event)*100.0 if needed_event>0.0 else 0.0, 1550 nb_job, step)) 1551 self.create_resubmit_one_iter(Pdir, G, nevents, nb_job, step) 1552 1553 1554 1555 return 0
1556 1557
1558 - def write_results(self, grid_calculator, cross, error, Pdir, G, step, efficiency):
1559 1560 #compute the value 1561 if cross == 0: 1562 abscross,nw, luminosity = 0, 0, 0 1563 wgt, maxit,nunwgt, wgt, nevents = 0,0,0,0,0 1564 error = 0 1565 else: 1566 grid_calculator.results.compute_values() 1567 abscross = self.abscross[(Pdir,G)]/self.sigma[(Pdir,G)] 1568 nunwgt, wgt = self.generated_events[(Pdir, G)] 1569 nw = int(nunwgt / efficiency) 1570 nunwgt = int(nunwgt) 1571 maxit = step 1572 nevents = nunwgt 1573 # make the unweighting to compute the number of events: 1574 luminosity = nunwgt/cross 1575 1576 #format the results.dat 1577 def fstr(nb): 1578 data = '%E' % nb 1579 nb, power = data.split('E') 1580 nb = float(nb) /10 1581 power = int(power) + 1 1582 return '%.5fE%+03i' %(nb,power)
1583 line = '%s %s %s %i %i %i %i %s %s %s 0.0 0.0 0\n' % \ 1584 (fstr(cross), fstr(error*cross), fstr(error*cross), 1585 nevents, nw, maxit,nunwgt, 1586 fstr(luminosity), fstr(wgt), fstr(abscross)) 1587 1588 fsock = open(pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 1589 'results.dat'),'w') 1590 fsock.writelines(line) 1591 fsock.close()
1592
1593 1594 1595 1596 -class gen_ximprove_gridpack(gen_ximprove_v4):
1597 1598 min_iter = 1 1599 max_iter = 12 1600 max_request_event = 1e12 # split jobs if a channel if it needs more than that 1601 max_event_in_iter = 5000 1602 min_event_in_iter = 1000 1603 combining_job = sys.maxint 1604
1605 - def __init__(self, *args, **opts):
1606 1607 self.ngran = -1 1608 self.gscalefact = {} 1609 self.readonly = False 1610 if 'ngran' in opts: 1611 self.gran = opts['ngran'] 1612 # del opts['ngran'] 1613 if 'readonly' in opts: 1614 self.readonly = opts['readonly'] 1615 super(gen_ximprove_gridpack,self).__init__(*args, **opts) 1616 if self.ngran == -1: 1617 self.ngran = 1
1618
1619 - def find_job_for_event(self):
1620 """return the list of channel that need to be improved""" 1621 import random 1622 1623 assert self.err_goal >=1 1624 self.err_goal = int(self.err_goal) 1625 self.gscalefact = {} 1626 1627 xtot = self.results.axsec 1628 goal_lum = self.err_goal/(xtot+1e-99) #pb^-1 1629 # logger.info('Effective Luminosity %s pb^-1', goal_lum) 1630 1631 all_channels = sum([list(P) for P in self.results],[]) 1632 all_channels.sort(cmp= lambda x,y: 1 if y.get('luminosity') - \ 1633 x.get('luminosity') > 0 else -1) 1634 1635 to_refine = [] 1636 for C in all_channels: 1637 tag = C.get('name') 1638 self.gscalefact[tag] = 0 1639 R = random.random() 1640 if C.get('axsec') == 0: 1641 continue 1642 if (goal_lum * C.get('axsec') < R*self.ngran ): 1643 continue # no event to generate events 1644 self.gscalefact[tag] = max(1, 1/(goal_lum * C.get('axsec')/ self.ngran)) 1645 #need to generate events 1646 logger.debug('request events for ', C.get('name'), 'cross=', 1647 C.get('axsec'), 'needed events = ', goal_lum * C.get('axsec')) 1648 to_refine.append(C) 1649 1650 logger.info('need to improve %s channels' % len(to_refine)) 1651 return goal_lum, to_refine
1652
1653 - def get_job_for_event(self):
1654 """generate the script in order to generate a given number of event""" 1655 # correspond to write_gen in the fortran version 1656 1657 1658 goal_lum, to_refine = self.find_job_for_event() 1659 1660 jobs = [] # list of the refine if some job are split is list of 1661 # dict with the parameter of the run. 1662 1663 # loop over the channel to refine 1664 for C in to_refine: 1665 #1. Compute the number of points are needed to reach target 1666 needed_event = max(goal_lum*C.get('axsec'), self.ngran) 1667 nb_split = 1 1668 1669 #2. estimate how many points we need in each iteration 1670 if C.get('nunwgt') > 0: 1671 nevents = needed_event / nb_split * (C.get('nevents') / C.get('nunwgt')) 1672 #split by iter 1673 nevents = int(nevents / (2**self.min_iter-1)) 1674 else: 1675 nevents = self.max_event_in_iter 1676 1677 if nevents < self.min_event_in_iter: 1678 nevents = self.min_event_in_iter 1679 # 1680 # forbid too low/too large value 1681 nevents = max(self.min_event_in_iter, min(self.max_event_in_iter, nevents)) 1682 logger.debug("%s : need %s event. Need %s split job of %s points", C.name, needed_event, nb_split, nevents) 1683 1684 1685 #create the info dict assume no splitting for the default 1686 info = {'name': self.cmd.results.current['run_name'], 1687 'script_name': 'unknown', 1688 'directory': C.name, # need to be change for splitted job 1689 'P_dir': os.path.basename(C.parent_name), 1690 'offset': 1, # need to be change for splitted job 1691 'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), 1692 'nevents': nevents, #int(nevents*self.gen_events_security)+1, 1693 'maxiter': self.max_iter, 1694 'miniter': self.min_iter, 1695 'precision': -1*int(needed_event+1)/C.get('axsec'), 1696 'requested_event': needed_event, 1697 'nhel': self.run_card['nhel'], 1698 'channel': C.name.replace('G',''), 1699 'grid_refinment' : 0, #no refinment of the grid 1700 'base_directory': '', #should be change in splitted job if want to keep the grid 1701 'packet': None, 1702 } 1703 1704 1705 jobs.append(info) 1706 1707 1708 write_dir = '.' if self.readonly else None 1709 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs, write_dir) 1710 1711 done = [] 1712 for j in jobs: 1713 if j['P_dir'] in done: 1714 continue 1715 1716 # set the working directory path. 1717 pwd = pjoin(os.getcwd(),j['P_dir']) if self.readonly else pjoin(self.me_dir, 'SubProcesses', j['P_dir']) 1718 exe = pjoin(pwd, 'ajob1') 1719 st = os.stat(exe) 1720 os.chmod(exe, st.st_mode | stat.S_IEXEC) 1721 1722 # run the code 1723 cluster.onecore.launch_and_wait(exe, cwd=pwd, packet_member=j['packet']) 1724 1725 write_dir = '.' if self.readonly else pjoin(self.me_dir, 'SubProcesses') 1726 self.check_events(goal_lum, to_refine, jobs, write_dir)
1727 1728
1729 - def check_events(self, goal_lum, to_refine, jobs, Sdir):
1730 """check that we get the number of requested events if not resubmit.""" 1731 1732 new_jobs = [] 1733 1734 for C, job_info in zip(to_refine, jobs): 1735 P = job_info['P_dir'] 1736 G = job_info['channel'] 1737 axsec = C.get('axsec') 1738 requested_events= job_info['requested_event'] 1739 1740 1741 new_results = sum_html.OneResult((P,G)) 1742 new_results.read_results(pjoin(Sdir,P, 'G%s'%G, 'results.dat')) 1743 1744 # need to resubmit? 1745 if new_results.get('nunwgt') < requested_events: 1746 pwd = pjoin(os.getcwd(),job_info['P_dir'],'G%s'%G) if self.readonly else \ 1747 pjoin(self.me_dir, 'SubProcesses', job_info['P_dir'],'G%s'%G) 1748 job_info['requested_event'] -= new_results.get('nunwgt') 1749 job_info['precision'] -= -1*job_info['requested_event']/axsec 1750 job_info['offset'] += 1 1751 new_jobs.append(job_info) 1752 files.mv(pjoin(pwd, 'events.lhe'), pjoin(pwd, 'events.lhe.previous')) 1753 1754 if new_jobs: 1755 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), new_jobs, Sdir) 1756 1757 done = [] 1758 for j in new_jobs: 1759 if j['P_dir'] in done: 1760 continue 1761 G = j['channel'] 1762 # set the working directory path. 1763 pwd = pjoin(os.getcwd(),j['P_dir']) if self.readonly \ 1764 else pjoin(self.me_dir, 'SubProcesses', j['P_dir']) 1765 exe = pjoin(pwd, 'ajob1') 1766 st = os.stat(exe) 1767 os.chmod(exe, st.st_mode | stat.S_IEXEC) 1768 1769 # run the code 1770 cluster.onecore.launch_and_wait(exe, cwd=pwd, packet_member=j['packet']) 1771 pwd = pjoin(pwd, 'G%s'%G) 1772 # concatanate with old events file 1773 files.put_at_end(pjoin(pwd, 'events.lhe'),pjoin(pwd, 'events.lhe.previous')) 1774 1775 return self.check_events(goal_lum, to_refine, new_jobs, Sdir)
1776