Package madgraph :: Package madevent :: Module gen_ximprove
[hide private]
[frames] | no frames]

Source Code for Module madgraph.madevent.gen_ximprove

   1  ################################################################################ 
   2  # 
   3  # Copyright (c) 2014 The MadGraph5_aMC@NLO Development team and Contributors 
   4  # 
   5  # This file is a part of the MadGraph5_aMC@NLO project, an application which  
   6  # automatically generates Feynman diagrams and matrix elements for arbitrary 
   7  # high-energy processes in the Standard Model and beyond. 
   8  # 
   9  # It is subject to the MadGraph5_aMC@NLO license which should accompany this  
  10  # distribution. 
  11  # 
  12  # For more information, visit madgraph.phys.ucl.ac.be and amcatnlo.web.cern.ch 
  13  # 
  14  ################################################################################ 
  15  """ A python file to replace the fortran script gen_ximprove. 
  16      This script analyses the result of the survey/ previous refine and  
  17      creates the jobs for the following script. 
  18  """ 
  19  from __future__ import division 
  20   
  21  import collections 
  22  import os 
  23  import glob 
  24  import logging 
  25  import math 
  26  import re 
  27  import subprocess 
  28  import shutil 
  29   
  30  try: 
  31      import madgraph 
  32  except ImportError: 
  33      MADEVENT = True 
  34      import internal.sum_html as sum_html 
  35      import internal.banner as bannermod 
  36      import internal.misc as misc 
  37      import internal.files as files 
  38      import internal.cluster as cluster 
  39      import internal.combine_grid as combine_grid 
  40      import internal.combine_runs as combine_runs 
  41      import internal.lhe_parser as lhe_parser 
  42  else: 
  43      MADEVENT= False 
  44      import madgraph.madevent.sum_html as sum_html 
  45      import madgraph.various.banner as bannermod 
  46      import madgraph.various.misc as misc 
  47      import madgraph.iolibs.files as files 
  48      import madgraph.various.cluster as cluster 
  49      import madgraph.madevent.combine_grid as combine_grid 
  50      import madgraph.madevent.combine_runs as combine_runs 
  51      import madgraph.various.lhe_parser as lhe_parser 
  52   
  53  logger = logging.getLogger('madgraph.madevent.gen_ximprove') 
  54  pjoin = os.path.join 
55 56 -class gensym(object):
57 """a class to call the fortran gensym executable and handle it's output 58 in order to create the various job that are needed for the survey""" 59 60 #convenient shortcut for the formatting of variable 61 @ staticmethod
62 - def format_variable(*args):
63 return bannermod.ConfigFile.format_variable(*args)
64 65 combining_job = 2 # number of channel by ajob 66 splitted_grid = False 67 min_iterations = 3 68 mode= "survey" 69 70
71 - def __init__(self, cmd, opt=None):
72 73 try: 74 super(gensym, self).__init__(cmd, opt) 75 except TypeError: 76 pass 77 78 # Run statistics, a dictionary of RunStatistics(), with 79 self.run_statistics = {} 80 81 self.cmd = cmd 82 self.run_card = cmd.run_card 83 self.me_dir = cmd.me_dir 84 85 86 # dictionary to keep track of the precision when combining iteration 87 self.cross = collections.defaultdict(int) 88 self.abscross = collections.defaultdict(int) 89 self.sigma = collections.defaultdict(int) 90 self.chi2 = collections.defaultdict(int) 91 92 self.splitted_grid = False 93 if self.cmd.proc_characteristics['loop_induced']: 94 nexternal = self.cmd.proc_characteristics['nexternal'] 95 self.splitted_grid = max(2, (nexternal-2)**2) 96 if hasattr(self.cmd, "opts") and self.cmd.opts['accuracy'] == 0.1: 97 self.cmd.opts['accuracy'] = 0.02 98 99 if isinstance(cmd.cluster, cluster.MultiCore) and self.splitted_grid > 1: 100 self.splitted_grid = int(cmd.cluster.nb_core**0.5) 101 if self.splitted_grid == 1 and cmd.cluster.nb_core >1: 102 self.splitted_grid = 2 103 104 #if the user defines it in the run_card: 105 if self.run_card['survey_splitting'] != -1: 106 self.splitted_grid = self.run_card['survey_splitting'] 107 108 self.splitted_Pdir = {} 109 self.splitted_for_dir = lambda x,y: self.splitted_grid 110 self.combining_job_for_Pdir = lambda x: self.combining_job 111 self.lastoffset = {}
112
113 - def launch(self):
114 """ """ 115 116 self.subproc = [l.strip() for l in open(pjoin(self.me_dir,'SubProcesses', 117 'subproc.mg'))] 118 subproc = self.subproc 119 120 P_zero_result = [] # check the number of times where they are no phase-space 121 122 nb_tot_proc = len(subproc) 123 for nb_proc,subdir in enumerate(subproc): 124 job_list = {} 125 self.cmd.update_status('Compiling for process %s/%s. <br> (previous processes already running)' % \ 126 (nb_proc+1,nb_tot_proc), level=None) 127 128 subdir = subdir.strip() 129 Pdir = pjoin(self.me_dir, 'SubProcesses',subdir) 130 logger.info(' %s ' % subdir) 131 132 # clean previous run 133 for match in glob.glob(pjoin(Pdir, '*ajob*')): 134 if os.path.basename(match)[:4] in ['ajob', 'wait', 'run.', 'done']: 135 os.remove(match) 136 for match in glob.glob(pjoin(Pdir, 'G*')): 137 if os.path.exists(pjoin(match,'results.dat')): 138 os.remove(pjoin(match, 'results.dat')) 139 if os.path.exists(pjoin(match, 'ftn25')): 140 os.remove(pjoin(match, 'ftn25')) 141 142 #compile gensym 143 self.cmd.compile(['gensym'], cwd=Pdir) 144 if not os.path.exists(pjoin(Pdir, 'gensym')): 145 raise Exception, 'Error make gensym not successful' 146 147 # Launch gensym 148 p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, 149 stderr=subprocess.STDOUT, cwd=Pdir) 150 #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts 151 (stdout, _) = p.communicate('') 152 153 if os.path.exists(pjoin(self.me_dir,'error')): 154 files.mv(pjoin(self.me_dir,'error'), pjoin(Pdir,'ajob.no_ps.log')) 155 P_zero_result.append(subdir) 156 continue 157 158 job_list[Pdir] = stdout.split() 159 self.cmd.compile(['madevent'], cwd=Pdir) 160 self.submit_to_cluster(job_list) 161 return job_list, P_zero_result
162 163
164 - def submit_to_cluster(self, job_list):
165 """ """ 166 167 if self.run_card['job_strategy'] > 0: 168 if len(job_list) >1: 169 for path, dirs in job_list.items(): 170 self.submit_to_cluster({path:dirs}) 171 return 172 path, value = job_list.items()[0] 173 nexternal = self.cmd.proc_characteristics['nexternal'] 174 current = open(pjoin(path, "nexternal.inc")).read() 175 ext = re.search(r"PARAMETER \(NEXTERNAL=(\d+)\)", current).group(1) 176 177 if self.run_card['job_strategy'] == 2: 178 self.splitted_grid = 2 179 if nexternal == int(ext): 180 to_split = 2 181 else: 182 to_split = 0 183 if hasattr(self, 'splitted_Pdir'): 184 self.splitted_Pdir[path] = to_split 185 else: 186 self.splitted_Pdir = {path: to_split} 187 self.splitted_for_dir = lambda x,y : self.splitted_Pdir[x] 188 elif self.run_card['job_strategy'] == 1: 189 if nexternal == int(ext): 190 combine = 1 191 else: 192 combine = self.combining_job 193 if hasattr(self, 'splitted_Pdir'): 194 self.splitted_Pdir[path] = combine 195 else: 196 self.splitted_Pdir = {path: combine} 197 self.combining_job_for_Pdir = lambda x : self.splitted_Pdir[x] 198 199 if not self.splitted_grid: 200 return self.submit_to_cluster_no_splitting(job_list) 201 elif self.cmd.cluster_mode == 0: 202 return self.submit_to_cluster_no_splitting(job_list) 203 elif self.cmd.cluster_mode == 2 and self.cmd.options['nb_core'] == 1: 204 return self.submit_to_cluster_no_splitting(job_list) 205 else: 206 return self.submit_to_cluster_splitted(job_list)
207 208
209 - def submit_to_cluster_no_splitting(self, job_list):
210 """submit the survey without the parralelization. 211 This is the old mode which is still usefull in single core""" 212 213 # write the template file for the parameter file 214 self.write_parameter(parralelization=False, Pdirs=job_list.keys()) 215 216 217 # launch the job with the appropriate grouping 218 for Pdir, jobs in job_list.items(): 219 jobs = list(jobs) 220 i=0 221 while jobs: 222 i+=1 223 to_submit = ['0'] # the first entry is actually the offset 224 for _ in range(self.combining_job_for_Pdir(Pdir)): 225 if jobs: 226 to_submit.append(jobs.pop(0)) 227 228 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 229 argument=to_submit, 230 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir))
231 232
233 - def create_resubmit_one_iter(self, Pdir, G, submit_ps, nb_job, step=0):
234 """prepare the input_file for submitting the channel""" 235 236 237 if 'SubProcesses' not in Pdir: 238 Pdir = pjoin(self.me_dir, 'SubProcesses', Pdir) 239 240 #keep track of how many job are sended 241 self.splitted_Pdir[(Pdir, G)] = int(nb_job) 242 243 244 # 1. write the new input_app.txt 245 run_card = self.cmd.run_card 246 options = {'event' : submit_ps, 247 'maxiter': 1, 248 'miniter': 1, 249 'accuracy': self.cmd.opts['accuracy'], 250 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 251 else run_card['nhel'], 252 'gridmode': -2, 253 'channel' : G 254 } 255 256 Gdir = pjoin(Pdir, 'G%s' % G) 257 self.write_parameter_file(pjoin(Gdir, 'input_app.txt'), options) 258 259 # 2. check that ftn25 exists. 260 assert os.path.exists(pjoin(Gdir, "ftn25")) 261 262 263 # 3. Submit the new jobs 264 #call back function 265 packet = cluster.Packet((Pdir, G, step+1), 266 self.combine_iteration, 267 (Pdir, G, step+1)) 268 269 if step ==0: 270 self.lastoffset[(Pdir, G)] = 0 271 272 # resubmit the new jobs 273 for i in xrange(int(nb_job)): 274 name = "G%s_%s" % (G,i+1) 275 self.lastoffset[(Pdir, G)] += 1 276 offset = self.lastoffset[(Pdir, G)] 277 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'refine_splitted.sh'), 278 argument=[name, 'G%s'%G, offset], 279 cwd= Pdir, 280 packet_member=packet)
281 282
283 - def submit_to_cluster_splitted(self, job_list):
284 """ submit the version of the survey with splitted grid creation 285 """ 286 287 #if self.splitted_grid <= 1: 288 # return self.submit_to_cluster_no_splitting(job_list) 289 290 for Pdir, jobs in job_list.items(): 291 if self.splitted_for_dir(Pdir, jobs[0]) <= 1: 292 return self.submit_to_cluster_no_splitting({Pdir:jobs}) 293 294 self.write_parameter(parralelization=True, Pdirs=[Pdir]) 295 # launch the job with the appropriate grouping 296 297 for job in jobs: 298 packet = cluster.Packet((Pdir, job, 1), self.combine_iteration, (Pdir, job, 1)) 299 for i in range(self.splitted_for_dir(Pdir, job)): 300 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 301 argument=[i+1, job], 302 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir), 303 packet_member=packet)
304
305 - def combine_iteration(self, Pdir, G, step):
306 307 grid_calculator, cross, error = self.combine_grid(Pdir, G, step) 308 309 # Compute the number of events used for this run. 310 nb_events = grid_calculator.target_evt 311 312 Gdirs = [] #build the the list of directory 313 for i in range(self.splitted_for_dir(Pdir, G)): 314 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 315 Gdirs.append(path) 316 317 # 4. make the submission of the next iteration 318 # Three cases - less than 3 iteration -> continue 319 # - more than 3 and less than 5 -> check error 320 # - more than 5 -> prepare info for refine 321 need_submit = False 322 if step < self.min_iterations and cross != 0: 323 if step == 1: 324 need_submit = True 325 else: 326 across = self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 327 tot_across = self.get_current_axsec() 328 if across / tot_across < 1e-6: 329 need_submit = False 330 elif error < self.cmd.opts['accuracy'] / 100: 331 need_submit = False 332 else: 333 need_submit = True 334 335 elif step >= self.cmd.opts['iterations']: 336 need_submit = False 337 elif self.cmd.opts['accuracy'] < 0: 338 #check for luminosity 339 raise Exception, "Not Implemented" 340 elif self.abscross[(Pdir,G)] == 0: 341 need_submit = False 342 else: 343 across = self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 344 tot_across = self.get_current_axsec() 345 if across == 0: 346 need_submit = False 347 elif across / tot_across < 1e-5: 348 need_submit = False 349 elif error > self.cmd.opts['accuracy']: 350 need_submit = True 351 else: 352 need_submit = False 353 354 355 if cross: 356 grid_calculator.write_grid_for_submission(Pdir,G, 357 self.splitted_for_dir(Pdir, G), 358 nb_events,mode=self.mode, 359 conservative_factor=5.0) 360 361 xsec_format = '.%ig'%(max(3,int(math.log10(1.0/float(error)))+2) 362 if float(cross)!=0.0 and float(error)!=0.0 else 8) 363 if need_submit: 364 message = "%%s/G%%s is at %%%s +- %%.3g pb. Now submitting iteration #%s."%(xsec_format, step+1) 365 logger.info(message%\ 366 (os.path.basename(Pdir), G, float(cross), 367 float(error)*float(cross))) 368 self.resubmit_survey(Pdir,G, Gdirs, step) 369 elif cross: 370 logger.info("Survey finished for %s/G%s at %s"%( 371 os.path.basename(Pdir),G,('%%%s +- %%.3g pb'%xsec_format))% 372 (float(cross), float(error)*float(cross))) 373 # prepare information for refine 374 newGpath = pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G) 375 if not os.path.exists(newGpath): 376 os.mkdir(newGpath) 377 378 # copy the new grid: 379 files.cp(pjoin(Gdirs[0], 'ftn25'), 380 pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 'ftn26')) 381 382 # copy the events 383 fsock = open(pjoin(newGpath, 'events.lhe'), 'w') 384 for Gdir in Gdirs: 385 fsock.write(open(pjoin(Gdir, 'events.lhe')).read()) 386 387 # copy one log 388 files.cp(pjoin(Gdirs[0], 'log.txt'), 389 pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G)) 390 391 392 # create the appropriate results.dat 393 self.write_results(grid_calculator, cross, error, Pdir, G, step) 394 else: 395 logger.info("Survey finished for %s/G%s [0 cross]", os.path.basename(Pdir),G) 396 397 Gdir = pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G) 398 if not os.path.exists(Gdir): 399 os.mkdir(Gdir) 400 # copy one log 401 files.cp(pjoin(Gdirs[0], 'log.txt'), Gdir) 402 # create the appropriate results.dat 403 self.write_results(grid_calculator, cross, error, Pdir, G, step) 404 405 return 0
406
407 - def combine_grid(self, Pdir, G, step, exclude_sub_jobs=[]):
408 """ exclude_sub_jobs is to remove some of the subjobs if a numerical 409 issue is detected in one of them. Warning is issue when this occurs. 410 """ 411 412 # 1. create an object to combine the grid information and fill it 413 grid_calculator = combine_grid.grid_information(self.run_card['nhel']) 414 415 for i in range(self.splitted_for_dir(Pdir, G)): 416 if i in exclude_sub_jobs: 417 continue 418 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 419 fsock = misc.mult_try_open(pjoin(path, 'results.dat')) 420 one_result = grid_calculator.add_results_information(fsock) 421 fsock.close() 422 if one_result.axsec == 0: 423 grid_calculator.onefail = True 424 continue # grid_information might not exists 425 fsock = misc.mult_try_open(pjoin(path, 'grid_information')) 426 grid_calculator.add_one_grid_information(fsock) 427 fsock.close() 428 os.remove(pjoin(path, 'results.dat')) 429 #os.remove(pjoin(path, 'grid_information')) 430 431 432 433 #2. combine the information about the total crossection / error 434 # start by keep the interation in memory 435 cross, across, sigma = grid_calculator.get_cross_section() 436 437 #3. Try to avoid one single PS point which ruins the integration 438 # Should be related to loop evaluation instability. 439 maxwgt = grid_calculator.get_max_wgt(0.01) 440 if maxwgt: 441 nunwgt = grid_calculator.get_nunwgt(maxwgt) 442 # Make sure not to apply the security below during the first step of the 443 # survey. Also, disregard channels with a contribution relative to the 444 # total cross-section smaller than 1e-8 since in this case it is unlikely 445 # that this channel will need more than 1 event anyway. 446 apply_instability_security = False 447 rel_contrib = 0.0 448 if (self.__class__ != gensym or step > 1): 449 Pdir_across = 0.0 450 Gdir_across = 0.0 451 for (mPdir,mG) in self.abscross.keys(): 452 if mPdir == Pdir: 453 Pdir_across += (self.abscross[(mPdir,mG)]/ 454 (self.sigma[(mPdir,mG)]+1e-99)) 455 if mG == G: 456 Gdir_across += (self.abscross[(mPdir,mG)]/ 457 (self.sigma[(mPdir,mG)]+1e-99)) 458 rel_contrib = abs(Gdir_across/(Pdir_across+1e-99)) 459 if rel_contrib > (1.0e-8) and \ 460 nunwgt < 2 and len(grid_calculator.results) > 1: 461 apply_instability_security = True 462 463 if apply_instability_security: 464 # check the ratio between the different submit 465 th_maxwgt = [(r.th_maxwgt,i) for i,r in enumerate(grid_calculator.results)] 466 th_maxwgt.sort() 467 ratio = th_maxwgt[-1][0]/th_maxwgt[-2][0] 468 if ratio > 1e4: 469 logger.warning( 470 """"One Event with large weight have been found (ratio = %.3g) in channel G%s (with rel.contrib=%.3g). 471 This is likely due to numerical instabilities. The associated job is discarded to recover. 472 For offline investigation, the problematic discarded events are stored in: 473 %s"""%(ratio,G,rel_contrib,pjoin(Pdir,'DiscardedUnstableEvents'))) 474 exclude_sub_jobs = list(exclude_sub_jobs) 475 exclude_sub_jobs.append(th_maxwgt[-1][1]) 476 grid_calculator.results.run_statistics['skipped_subchannel'] += 1 477 478 # Add some monitoring of the problematic events 479 gPath = pjoin(Pdir, "G%s_%s" % (G, th_maxwgt[-1][1]+1)) 480 if os.path.isfile(pjoin(gPath,'events.lhe')): 481 lhe_file = lhe_parser.EventFile(pjoin(gPath,'events.lhe')) 482 discardedPath = pjoin(Pdir,'DiscardedUnstableEvents') 483 if not os.path.exists(discardedPath): 484 os.mkdir(discardedPath) 485 if os.path.isdir(discardedPath): 486 # Keep only the event with a maximum weight, as it surely 487 # is the problematic one. 488 evtRecord = open(pjoin(discardedPath,'discarded_G%s.dat'%G),'a') 489 lhe_file.seek(0) #rewind the file 490 try: 491 evtRecord.write('\n'+str(max(lhe_file,key=lambda evt:abs(evt.wgt)))) 492 except Exception: 493 #something wrong write the full file. 494 lhe_file.close() 495 evtRecord.write(pjoin(gPath,'events.lhe').read()) 496 evtRecord.close() 497 498 return self.combine_grid(Pdir, G, step, exclude_sub_jobs) 499 500 501 if across !=0: 502 if sigma != 0: 503 self.cross[(Pdir,G)] += cross**3/sigma**2 504 self.abscross[(Pdir,G)] += across * cross**2/sigma**2 505 self.sigma[(Pdir,G)] += cross**2/ sigma**2 506 self.chi2[(Pdir,G)] += cross**4/sigma**2 507 # and use those iteration to get the current estimator 508 cross = self.cross[(Pdir,G)]/self.sigma[(Pdir,G)] 509 if step > 1: 510 error = math.sqrt(abs((self.chi2[(Pdir,G)]/cross**2 - \ 511 self.sigma[(Pdir,G)])/(step-1))/self.sigma[(Pdir,G)]) 512 else: 513 error = sigma/cross 514 else: 515 self.cross[(Pdir,G)] = cross 516 self.abscross[(Pdir,G)] = across 517 self.sigma[(Pdir,G)] = 0 518 self.chi2[(Pdir,G)] = 0 519 cross = self.cross[(Pdir,G)] 520 error = 0 521 522 else: 523 error = 0 524 525 grid_calculator.results.compute_values(update_statistics=True) 526 if (str(os.path.basename(Pdir)), G) in self.run_statistics: 527 self.run_statistics[(str(os.path.basename(Pdir)), G)]\ 528 .aggregate_statistics(grid_calculator.results.run_statistics) 529 else: 530 self.run_statistics[(str(os.path.basename(Pdir)), G)] = \ 531 grid_calculator.results.run_statistics 532 533 self.warnings_from_statistics(G, grid_calculator.results.run_statistics) 534 stats_msg = grid_calculator.results.run_statistics.nice_output( 535 '/'.join([os.path.basename(Pdir),'G%s'%G])) 536 537 if stats_msg: 538 logger.log(5, stats_msg) 539 540 # Clean up grid_information to avoid border effects in case of a crash 541 for i in range(self.splitted_for_dir(Pdir, G)): 542 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 543 try: 544 os.remove(pjoin(path, 'grid_information')) 545 except OSError, oneerror: 546 if oneerror.errno != 2: 547 raise 548 return grid_calculator, cross, error
549
550 - def warnings_from_statistics(self,G,stats):
551 """Possible warn user for worrying MadLoop stats for this channel.""" 552 553 if stats['n_madloop_calls']==0: 554 return 555 556 EPS_fraction = float(stats['exceptional_points'])/stats['n_madloop_calls'] 557 558 msg = "Channel %s has encountered a fraction of %.3g\n"+ \ 559 "of numerically unstable loop matrix element computations\n"+\ 560 "(which could not be rescued using quadruple precision).\n"+\ 561 "The results might not be trusted." 562 563 if 0.01 > EPS_fraction > 0.001: 564 logger.warning(msg%(G,EPS_fraction)) 565 elif EPS_fraction > 0.01: 566 logger.critical((msg%(G,EPS_fraction)).replace('might', 'can')) 567 raise Exception, (msg%(G,EPS_fraction)).replace('might', 'can')
568
569 - def get_current_axsec(self):
570 571 across = 0 572 for (Pdir,G) in self.abscross: 573 across += self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 574 return across
575
576 - def write_results(self, grid_calculator, cross, error, Pdir, G, step):
577 578 #compute the value 579 if cross == 0: 580 abscross,nw, luminosity = 0, 0, 0 581 wgt, maxit,nunwgt, wgt, nevents = 0,0,0,0,0 582 maxwgt = 0 583 error = 0 584 else: 585 grid_calculator.results.compute_values() 586 abscross = self.abscross[(Pdir,G)]/self.sigma[(Pdir,G)] 587 nw = grid_calculator.results.nw 588 wgt = grid_calculator.results.wgt 589 maxit = step 590 wgt = 0 591 nevents = grid_calculator.results.nevents 592 maxwgt = grid_calculator.get_max_wgt() 593 nunwgt = grid_calculator.get_nunwgt() 594 luminosity = nunwgt/cross 595 596 #format the results.dat 597 def fstr(nb): 598 data = '%E' % nb 599 nb, power = data.split('E') 600 nb = float(nb) /10 601 power = int(power) + 1 602 return '%.5fE%+03i' %(nb,power)
603 line = '%s %s %s %i %i %i %i %s %s %s %s 0.0 0\n' % \ 604 (fstr(cross), fstr(error*cross), fstr(error*cross), 605 nevents, nw, maxit,nunwgt, 606 fstr(luminosity), fstr(wgt), fstr(abscross), fstr(maxwgt)) 607 608 fsock = open(pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 609 'results.dat'),'w') 610 fsock.writelines(line) 611 fsock.close()
612
613 - def resubmit_survey(self, Pdir, G, Gdirs, step):
614 """submit the next iteration of the survey""" 615 616 # 1. write the new input_app.txt to double the number of points 617 run_card = self.cmd.run_card 618 options = {'event' : 2**(step) * self.cmd.opts['points'] / self.splitted_grid, 619 'maxiter': 1, 620 'miniter': 1, 621 'accuracy': self.cmd.opts['accuracy'], 622 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 623 else run_card['nhel'], 624 'gridmode': -2, 625 'channel' : '' 626 } 627 628 if int(options['helicity']) == 1: 629 options['event'] = options['event'] * 2**(self.cmd.proc_characteristics['nexternal']//3) 630 631 for Gdir in Gdirs: 632 self.write_parameter_file(pjoin(Gdir, 'input_app.txt'), options) 633 634 635 #2. resubmit the new jobs 636 packet = cluster.Packet((Pdir, G, step+1), self.combine_iteration, \ 637 (Pdir, G, step+1)) 638 nb_step = len(Gdirs) * (step+1) 639 for i,subdir in enumerate(Gdirs): 640 subdir = subdir.rsplit('_',1)[1] 641 subdir = int(subdir) 642 offset = nb_step+i+1 643 offset=str(offset) 644 tag = "%s.%s" % (subdir, offset) 645 646 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 647 argument=[tag, G], 648 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir), 649 packet_member=packet)
650 651 652 653
654 - def write_parameter_file(self, path, options):
655 """ """ 656 657 template =""" %(event)s %(maxiter)s %(miniter)s !Number of events and max and min iterations 658 %(accuracy)s !Accuracy 659 %(gridmode)s !Grid Adjustment 0=none, 2=adjust 660 1 !Suppress Amplitude 1=yes 661 %(helicity)s !Helicity Sum/event 0=exact 662 %(channel)s """ 663 options['event'] = int(options['event']) 664 open(path, 'w').write(template % options)
665 666 667
668 - def write_parameter(self, parralelization, Pdirs=None):
669 """Write the parameter of the survey run""" 670 671 run_card = self.cmd.run_card 672 673 options = {'event' : self.cmd.opts['points'], 674 'maxiter': self.cmd.opts['iterations'], 675 'miniter': self.min_iterations, 676 'accuracy': self.cmd.opts['accuracy'], 677 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 678 else run_card['nhel'], 679 'gridmode': 2, 680 'channel': '' 681 } 682 683 if int(options['helicity'])== 1: 684 options['event'] = options['event'] * 2**(self.cmd.proc_characteristics['nexternal']//3) 685 686 if parralelization: 687 options['gridmode'] = -2 688 options['maxiter'] = 1 #this is automatic in dsample anyway 689 options['miniter'] = 1 #this is automatic in dsample anyway 690 options['event'] /= self.splitted_grid 691 692 if not Pdirs: 693 Pdirs = self.subproc 694 695 for Pdir in Pdirs: 696 path =pjoin(Pdir, 'input_app.txt') 697 self.write_parameter_file(path, options)
698
699 700 701 -class gen_ximprove(object):
702 703 704 # some hardcoded value which impact the generation 705 gen_events_security = 1.2 # multiply the number of requested event by this number for security 706 combining_job = 0 # allow to run multiple channel in sequence 707 max_request_event = 1000 # split jobs if a channel if it needs more than that 708 max_event_in_iter = 5000 709 min_event_in_iter = 1000 710 max_splitting = 130 # maximum duplication of a given channel 711 min_iter = 3 712 max_iter = 9 713 keep_grid_for_refine = False # only apply if needed to split the job 714 715 #convenient shortcut for the formatting of variable 716 @ staticmethod
717 - def format_variable(*args):
718 return bannermod.ConfigFile.format_variable(*args)
719 720
721 - def __new__(cls, cmd, opt):
722 """Choose in which type of refine we want to be""" 723 724 if cmd.proc_characteristics['loop_induced']: 725 return super(gen_ximprove, cls).__new__(gen_ximprove_share, cmd, opt) 726 elif gen_ximprove.format_variable(cmd.run_card['gridpack'], bool): 727 raise Exception, "Not implemented" 728 elif cmd.run_card["job_strategy"] == 2: 729 return super(gen_ximprove, cls).__new__(gen_ximprove_share, cmd, opt) 730 else: 731 return super(gen_ximprove, cls).__new__(gen_ximprove_v4, cmd, opt)
732 733
734 - def __init__(self, cmd, opt=None):
735 736 try: 737 super(gen_ximprove, self).__init__(cmd, opt) 738 except TypeError: 739 pass 740 741 self.run_statistics = {} 742 self.cmd = cmd 743 self.run_card = cmd.run_card 744 run_card = self.run_card 745 self.me_dir = cmd.me_dir 746 747 #extract from the run_card the information that we need. 748 self.gridpack = run_card['gridpack'] 749 self.nhel = run_card['nhel'] 750 if "nhel_refine" in run_card: 751 self.nhel = run_card["nhel_refine"] 752 753 if self.run_card['refine_evt_by_job'] != -1: 754 self.max_request_event = run_card['refine_evt_by_job'] 755 756 757 # Default option for the run 758 self.gen_events = True 759 self.min_iter = 3 760 self.parralel = False 761 # parameter which was input for the normal gen_ximprove run 762 self.err_goal = 0.01 763 self.max_np = 9 764 self.split_channels = False 765 # parameter for the gridpack run 766 self.nreq = 2000 767 self.iseed = 4321 768 self.ngran = 1 769 770 # placeholder for information 771 self.results = 0 #updated in launch/update_html 772 773 if isinstance(opt, dict): 774 self.configure(opt) 775 elif isinstance(opt, bannermod.GridpackCard): 776 self.configure_gridpack(opt)
777
778 - def __call__(self):
779 return self.launch()
780
781 - def launch(self):
782 """running """ 783 784 #start the run 785 self.handle_seed() 786 787 self.results = sum_html.collect_result(self.cmd, None) 788 789 if self.gen_events: 790 # We run to provide a given number of events 791 self.get_job_for_event() 792 else: 793 # We run to achieve a given precision 794 self.get_job_for_precision()
795 796
797 - def configure(self, opt):
798 """Defines some parameter of the run""" 799 800 for key, value in opt.items(): 801 if key in self.__dict__: 802 targettype = type(getattr(self, key)) 803 setattr(self, key, self.format_variable(value, targettype, key)) 804 else: 805 raise Exception, '%s not define' % key 806 807 808 # special treatment always do outside the loop to avoid side effect 809 if 'err_goal' in opt: 810 if self.err_goal < 1: 811 logger.info("running for accuracy %s%%" % (self.err_goal*100)) 812 self.gen_events = False 813 elif self.err_goal >= 1: 814 logger.info("Generating %s unweigthed events." % self.err_goal) 815 self.gen_events = True 816 self.err_goal = self.err_goal * self.gen_events_security # security
817
818 - def handle_seed(self):
819 """not needed but for gridpack --which is not handle here for the moment""" 820 return
821 822
823 - def find_job_for_event(self):
824 """return the list of channel that need to be improved""" 825 826 assert self.err_goal >=1 827 self.err_goal = int(self.err_goal) 828 829 goal_lum = self.err_goal/(self.results.axsec+1e-99) #pb^-1 830 logger.info('Effective Luminosity %s pb^-1', goal_lum) 831 832 all_channels = sum([list(P) for P in self.results],[]) 833 all_channels.sort(cmp= lambda x,y: 1 if y.get('luminosity') - \ 834 x.get('luminosity') > 0 else -1) 835 836 to_refine = [] 837 for C in all_channels: 838 if C.get('axsec') == 0: 839 continue 840 if goal_lum/(C.get('luminosity')+1e-99) >= 1 + (self.gen_events_security-1)/2: 841 logger.debug("channel %s is at %s (%s) (%s pb)", C.name, C.get('luminosity'), goal_lum/(C.get('luminosity')+1e-99), C.get('xsec')) 842 to_refine.append(C) 843 elif C.get('xerr') > max(C.get('axsec'), 844 (1/(100*math.sqrt(self.err_goal)))*all_channels[-1].get('axsec')): 845 to_refine.append(C) 846 847 logger.info('need to improve %s channels' % len(to_refine)) 848 return goal_lum, to_refine
849
850 - def update_html(self):
851 """update the html from this object since it contains all the information""" 852 853 854 run = self.cmd.results.current['run_name'] 855 if not os.path.exists(pjoin(self.cmd.me_dir, 'HTML', run)): 856 os.mkdir(pjoin(self.cmd.me_dir, 'HTML', run)) 857 858 unit = self.cmd.results.unit 859 P_text = "" 860 if self.results: 861 Presults = self.results 862 else: 863 self.results = sum_html.collect_result(self.cmd, None) 864 Presults = self.results 865 866 for P_comb in Presults: 867 P_text += P_comb.get_html(run, unit, self.cmd.me_dir) 868 869 Presults.write_results_dat(pjoin(self.cmd.me_dir,'SubProcesses', 'results.dat')) 870 871 fsock = open(pjoin(self.cmd.me_dir, 'HTML', run, 'results.html'),'w') 872 fsock.write(sum_html.results_header) 873 fsock.write('%s <dl>' % Presults.get_html(run, unit, self.cmd.me_dir)) 874 fsock.write('%s </dl></body>' % P_text) 875 876 self.cmd.results.add_detail('cross', Presults.xsec) 877 self.cmd.results.add_detail('error', Presults.xerru) 878 879 return Presults.xsec, Presults.xerru
880
881 882 -class gen_ximprove_v4(gen_ximprove):
883 884 # some hardcoded value which impact the generation 885 gen_events_security = 1.2 # multiply the number of requested event by this number for security 886 combining_job = 0 # allow to run multiple channel in sequence 887 max_request_event = 1000 # split jobs if a channel if it needs more than that 888 max_event_in_iter = 5000 889 min_event_in_iter = 1000 890 max_splitting = 130 # maximum duplication of a given channel 891 min_iter = 3 892 max_iter = 9 893 keep_grid_for_refine = False # only apply if needed to split the job 894 895 896
897 - def __init__(self, cmd, opt=None):
898 899 super(gen_ximprove_v4, self).__init__(cmd, opt) 900 901 if cmd.opts['accuracy'] < cmd._survey_options['accuracy'][1]: 902 self.increase_precision()
903
904 - def reset_multijob(self):
905 906 for path in glob.glob(pjoin(self.me_dir, 'SubProcesses', '*', 907 '*','multijob.dat')): 908 open(path,'w').write('0\n')
909
910 - def write_multijob(self, Channel, nb_split):
911 """ """ 912 if nb_split <=1: 913 return 914 f = open(pjoin(self.me_dir, 'SubProcesses', Channel.get('name'), 'multijob.dat'), 'w') 915 f.write('%i\n' % nb_split) 916 f.close()
917
918 - def increase_precision(self):
919 920 self.max_event_in_iter = 20000 921 self.min_events = 7500 922 if int(self.nhel) == 1: 923 self.min_event_in_iter *= 2**(self.cmd.proc_characteristics['nexternal']//3) 924 self.max_event_in_iter *= 2**(self.cmd.proc_characteristics['nexternal']//2) 925 926 self.gen_events_security = 1.3
927 928 alphabet = "abcdefghijklmnopqrstuvwxyz"
929 - def get_job_for_event(self):
930 """generate the script in order to generate a given number of event""" 931 # correspond to write_gen in the fortran version 932 933 934 goal_lum, to_refine = self.find_job_for_event() 935 936 #reset the potential multijob of previous run 937 self.reset_multijob() 938 939 jobs = [] # list of the refine if some job are split is list of 940 # dict with the parameter of the run. 941 942 # try to have a smart load on the cluster (not really important actually) 943 if self.combining_job >1: 944 # add a nice ordering for the jobs 945 new_order = [] 946 if self.combining_job % 2 == 0: 947 for i in range(len(to_refine) //2): 948 new_order.append(to_refine[i]) 949 new_order.append(to_refine[-i-1]) 950 if len(to_refine) % 2: 951 new_order.append(to_refine[i+1]) 952 else: 953 for i in range(len(to_refine) //3): 954 new_order.append(to_refine[i]) 955 new_order.append(to_refine[-2*i-1]) 956 new_order.append(to_refine[-2*i-2]) 957 if len(to_refine) % 3 == 1: 958 new_order.append(to_refine[i+1]) 959 elif len(to_refine) % 3 == 2: 960 new_order.append(to_refine[i+2]) 961 #ensure that the reordering is done nicely 962 assert set([id(C) for C in to_refine]) == set([id(C) for C in new_order]) 963 to_refine = new_order 964 965 966 # loop over the channel to refine 967 for C in to_refine: 968 #1. Compute the number of points are needed to reach target 969 needed_event = goal_lum*C.get('axsec') 970 nb_split = int(max(1,((needed_event-1)// self.max_request_event) +1)) 971 if not self.split_channels: 972 nb_split = 1 973 if nb_split > self.max_splitting: 974 nb_split = self.max_splitting 975 nb_split=max(1, nb_split) 976 977 978 #2. estimate how many points we need in each iteration 979 if C.get('nunwgt') > 0: 980 nevents = needed_event / nb_split * (C.get('nevents') / C.get('nunwgt')) 981 #split by iter 982 nevents = int(nevents / (2**self.min_iter-1)) 983 else: 984 nevents = self.max_event_in_iter 985 986 if nevents < self.min_event_in_iter: 987 nb_split = int(nb_split * nevents / self.min_event_in_iter) + 1 988 nevents = self.min_event_in_iter 989 # 990 # forbid too low/too large value 991 nevents = max(self.min_event_in_iter, min(self.max_event_in_iter, nevents)) 992 logger.debug("%s : need %s event. Need %s split job of %s points", C.name, needed_event, nb_split, nevents) 993 994 995 # write the multi-job information 996 self.write_multijob(C, nb_split) 997 998 packet = cluster.Packet((C.parent_name, C.name), 999 combine_runs.CombineRuns, 1000 (pjoin(self.me_dir, 'SubProcesses', C.parent_name)), 1001 {"subproc": C.name, "nb_split":nb_split}) 1002 1003 1004 #create the info dict assume no splitting for the default 1005 info = {'name': self.cmd.results.current['run_name'], 1006 'script_name': 'unknown', 1007 'directory': C.name, # need to be change for splitted job 1008 'P_dir': C.parent_name, 1009 'offset': 1, # need to be change for splitted job 1010 'nevents': nevents, 1011 'maxiter': self.max_iter, 1012 'miniter': self.min_iter, 1013 'precision': -goal_lum/nb_split, 1014 'nhel': self.run_card['nhel'], 1015 'channel': C.name.replace('G',''), 1016 'grid_refinment' : 0, #no refinment of the grid 1017 'base_directory': '', #should be change in splitted job if want to keep the grid 1018 'packet': packet, 1019 } 1020 1021 if nb_split == 1: 1022 jobs.append(info) 1023 else: 1024 for i in range(nb_split): 1025 new_info = dict(info) 1026 new_info['offset'] = i+1 1027 new_info['directory'] += self.alphabet[i % 26] + str((i+1)//26) 1028 if self.keep_grid_for_refine: 1029 new_info['base_directory'] = info['directory'] 1030 jobs.append(new_info) 1031 1032 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs)
1033 1034
1035 - def create_ajob(self, template, jobs):
1036 """create the ajob""" 1037 1038 if not jobs: 1039 return 1040 1041 #filter the job according to their SubProcess directory # no mix submition 1042 P2job= collections.defaultdict(list) 1043 for j in jobs: 1044 P2job[j['P_dir']].append(j) 1045 if len(P2job) >1: 1046 for P in P2job.values(): 1047 self.create_ajob(template, P) 1048 return 1049 1050 #Here we can assume that all job are for the same directory. 1051 path = pjoin(self.me_dir, 'SubProcesses' ,jobs[0]['P_dir']) 1052 1053 template_text = open(template, 'r').read() 1054 # special treatment if needed to combine the script 1055 # computes how many submition miss one job 1056 if self.combining_job > 1: 1057 skip1=0 1058 n_channels = len(jobs) 1059 nb_sub = n_channels // self.combining_job 1060 nb_job_in_last = n_channels % self.combining_job 1061 if nb_job_in_last: 1062 nb_sub +=1 1063 skip1 = self.combining_job - nb_job_in_last 1064 if skip1 > nb_sub: 1065 self.combining_job -=1 1066 return self.create_ajob(template, jobs) 1067 combining_job = self.combining_job 1068 else: 1069 #define the variable for combining jobs even in not combine mode 1070 #such that we can use the same routine 1071 skip1=0 1072 combining_job =1 1073 nb_sub = len(jobs) 1074 1075 1076 nb_use = 0 1077 for i in range(nb_sub): 1078 script_number = i+1 1079 if i < skip1: 1080 nb_job = combining_job -1 1081 else: 1082 nb_job = combining_job 1083 fsock = open(pjoin(path, 'ajob%i' % script_number), 'w') 1084 for j in range(nb_use, nb_use + nb_job): 1085 if j> len(jobs): 1086 break 1087 info = jobs[j] 1088 info['script_name'] = 'ajob%i' % script_number 1089 if "base_directory" not in info: 1090 info["base_directory"] = "./" 1091 fsock.write(template_text % info) 1092 nb_use += nb_job
1093
1094 - def get_job_for_precision(self):
1095 """create the ajob to achieve a give precision on the total cross-section""" 1096 1097 1098 assert self.err_goal <=1 1099 xtot = abs(self.results.xsec) 1100 logger.info("Working on precision: %s %%" %(100*self.err_goal)) 1101 all_channels = sum([list(P) for P in self.results if P.mfactor],[]) 1102 limit = self.err_goal * xtot / len(all_channels) 1103 to_refine = [] 1104 rerr = 0 #error of the job not directly selected 1105 for C in all_channels: 1106 cerr = C.mfactor*(C.xerru + len(all_channels)*C.xerrc) 1107 if cerr > abs(limit): 1108 to_refine.append(C) 1109 else: 1110 rerr += cerr 1111 rerr *=rerr 1112 if not len(to_refine): 1113 return 1114 1115 # change limit since most don't contribute 1116 limit = math.sqrt((self.err_goal * xtot)**2 - rerr/math.sqrt(len(to_refine))) 1117 for C in to_refine[:]: 1118 cerr = C.mfactor*(C.xerru + len(to_refine)*C.xerrc) 1119 if cerr < limit: 1120 to_refine.remove(C) 1121 1122 # all the channel are now selected. create the channel information 1123 logger.info('need to improve %s channels' % len(to_refine)) 1124 1125 1126 jobs = [] # list of the refine if some job are split is list of 1127 # dict with the parameter of the run. 1128 1129 # loop over the channel to refine 1130 for C in to_refine: 1131 1132 #1. Determine how many events we need in each iteration 1133 yerr = C.mfactor*(C.xerru+len(to_refine)*C.xerrc) 1134 nevents = 0.2*C.nevents*(yerr/limit)**2 1135 1136 nb_split = int((nevents*(C.nunwgt/C.nevents)/self.max_request_event/ (2**self.min_iter-1))**(2/3)) 1137 nb_split = max(nb_split, 1) 1138 # **(2/3) to slow down the increase in number of jobs 1139 if nb_split > self.max_splitting: 1140 nb_split = self.max_splitting 1141 1142 if nb_split >1: 1143 nevents = nevents / nb_split 1144 self.write_multijob(C, nb_split) 1145 # forbid too low/too large value 1146 nevents = min(self.min_event_in_iter, max(self.max_event_in_iter, nevents)) 1147 1148 1149 #create the info dict assume no splitting for the default 1150 info = {'name': self.cmd.results.current['run_name'], 1151 'script_name': 'unknown', 1152 'directory': C.name, # need to be change for splitted job 1153 'P_dir': C.parent_name, 1154 'offset': 1, # need to be change for splitted job 1155 'nevents': nevents, 1156 'maxiter': self.max_iter, 1157 'miniter': self.min_iter, 1158 'precision': yerr/math.sqrt(nb_split)/(C.get('xsec')+ yerr), 1159 'nhel': self.run_card['nhel'], 1160 'channel': C.name.replace('G',''), 1161 'grid_refinment' : 1 1162 } 1163 1164 if nb_split == 1: 1165 jobs.append(info) 1166 else: 1167 for i in range(nb_split): 1168 new_info = dict(info) 1169 new_info['offset'] = i+1 1170 new_info['directory'] += self.alphabet[i % 26] + str((i+1)//26) 1171 jobs.append(new_info) 1172 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs)
1173
1174 - def update_html(self):
1175 """update the html from this object since it contains all the information""" 1176 1177 1178 run = self.cmd.results.current['run_name'] 1179 if not os.path.exists(pjoin(self.cmd.me_dir, 'HTML', run)): 1180 os.mkdir(pjoin(self.cmd.me_dir, 'HTML', run)) 1181 1182 unit = self.cmd.results.unit 1183 P_text = "" 1184 if self.results: 1185 Presults = self.results 1186 else: 1187 self.results = sum_html.collect_result(self.cmd, None) 1188 Presults = self.results 1189 1190 for P_comb in Presults: 1191 P_text += P_comb.get_html(run, unit, self.cmd.me_dir) 1192 1193 Presults.write_results_dat(pjoin(self.cmd.me_dir,'SubProcesses', 'results.dat')) 1194 1195 fsock = open(pjoin(self.cmd.me_dir, 'HTML', run, 'results.html'),'w') 1196 fsock.write(sum_html.results_header) 1197 fsock.write('%s <dl>' % Presults.get_html(run, unit, self.cmd.me_dir)) 1198 fsock.write('%s </dl></body>' % P_text) 1199 1200 self.cmd.results.add_detail('cross', Presults.xsec) 1201 self.cmd.results.add_detail('error', Presults.xerru) 1202 1203 return Presults.xsec, Presults.xerru
1204
1205 1206 1207 1208 -class gen_ximprove_v4_nogridupdate(gen_ximprove_v4):
1209 1210 # some hardcoded value which impact the generation 1211 gen_events_security = 1.1 # multiply the number of requested event by this number for security 1212 combining_job = 0 # allow to run multiple channel in sequence 1213 max_request_event = 400 # split jobs if a channel if it needs more than that 1214 max_event_in_iter = 500 1215 min_event_in_iter = 250 1216 max_splitting = 260 # maximum duplication of a given channel 1217 min_iter = 2 1218 max_iter = 6 1219 keep_grid_for_refine = True 1220 1221
1222 - def __init__(self, cmd, opt=None):
1223 1224 gen_ximprove.__init__(cmd, opt) 1225 1226 if cmd.proc_characteristics['loopinduced'] and \ 1227 cmd.proc_characteristics['nexternal'] > 2: 1228 self.increase_parralelization(cmd.proc_characteristics['nexternal'])
1229
1230 - def increase_parralelization(self, nexternal):
1231 1232 self.max_splitting = 1000 1233 1234 if self.run_card['refine_evt_by_job'] != -1: 1235 pass 1236 elif nexternal == 3: 1237 self.max_request_event = 200 1238 elif nexternal == 4: 1239 self.max_request_event = 100 1240 elif nexternal >= 5: 1241 self.max_request_event = 50 1242 self.min_event_in_iter = 125 1243 self.max_iter = 5
1244
1245 -class gen_ximprove_share(gen_ximprove, gensym):
1246 """Doing the refine in multicore. Each core handle a couple of PS point.""" 1247 1248 nb_ps_by_job = 2000 1249 mode = "refine" 1250 gen_events_security = 1.15 1251 # Note the real security is lower since we stop the jobs if they are at 96% 1252 # of this target. 1253
1254 - def __init__(self, *args, **opts):
1255 1256 super(gen_ximprove_share, self).__init__(*args, **opts) 1257 self.generated_events = {} 1258 self.splitted_for_dir = lambda x,y : self.splitted_Pdir[(x,y)]
1259 1260
1261 - def get_job_for_event(self):
1262 """generate the script in order to generate a given number of event""" 1263 # correspond to write_gen in the fortran version 1264 1265 1266 goal_lum, to_refine = self.find_job_for_event() 1267 self.goal_lum = goal_lum 1268 1269 # loop over the channel to refine to find the number of PS point to launch 1270 total_ps_points = 0 1271 channel_to_ps_point = [] 1272 for C in to_refine: 1273 #0. remove previous events files 1274 try: 1275 os.remove(pjoin(self.me_dir, "SubProcesses",C.parent_name, C.name, "events.lhe")) 1276 except: 1277 pass 1278 1279 #1. Compute the number of points are needed to reach target 1280 needed_event = goal_lum*C.get('axsec') 1281 if needed_event == 0: 1282 continue 1283 #2. estimate how many points we need in each iteration 1284 if C.get('nunwgt') > 0: 1285 nevents = needed_event * (C.get('nevents') / C.get('nunwgt')) 1286 #split by iter 1287 nevents = int(nevents / (2**self.min_iter-1)) 1288 else: 1289 nb_split = int(max(1,((needed_event-1)// self.max_request_event) +1)) 1290 if not self.split_channels: 1291 nb_split = 1 1292 if nb_split > self.max_splitting: 1293 nb_split = self.max_splitting 1294 nevents = self.max_event_in_iter * self.max_splitting 1295 else: 1296 nevents = self.max_event_in_iter * nb_split 1297 1298 if nevents > self.max_splitting*self.max_event_in_iter: 1299 logger.warning("Channel %s/%s has a very low efficiency of unweighting. Might not be possible to reach target" % \ 1300 (C.name, C.parent_name)) 1301 nevents = self.max_event_in_iter * self.max_splitting 1302 1303 total_ps_points += nevents 1304 channel_to_ps_point.append((C, nevents)) 1305 1306 if self.cmd.options["run_mode"] == 1: 1307 if self.cmd.options["cluster_size"]: 1308 nb_ps_by_job = total_ps_points /int(self.cmd.options["cluster_size"]) 1309 else: 1310 nb_ps_by_job = self.nb_ps_by_job 1311 elif self.cmd.options["run_mode"] == 2: 1312 remain = total_ps_points % self.cmd.options["nb_core"] 1313 if remain: 1314 nb_ps_by_job = 1 + (total_ps_points - remain) / self.cmd.options["nb_core"] 1315 else: 1316 nb_ps_by_job = total_ps_points / self.cmd.options["nb_core"] 1317 else: 1318 nb_ps_by_job = self.nb_ps_by_job 1319 1320 nb_ps_by_job = int(max(nb_ps_by_job, 500)) 1321 1322 for C, nevents in channel_to_ps_point: 1323 if nevents % nb_ps_by_job: 1324 nb_job = 1 + int(nevents // nb_ps_by_job) 1325 else: 1326 nb_job = int(nevents // nb_ps_by_job) 1327 submit_ps = min(nevents, nb_ps_by_job) 1328 if nb_job == 1: 1329 submit_ps = max(submit_ps, self.min_event_in_iter) 1330 self.create_resubmit_one_iter(C.parent_name, C.name[1:], submit_ps, nb_job, step=0) 1331 needed_event = goal_lum*C.get('xsec') 1332 logger.debug("%s/%s : need %s event. Need %s split job of %s points", C.parent_name, C.name, needed_event, nb_job, submit_ps)
1333 1334
1335 - def combine_iteration(self, Pdir, G, step):
1336 1337 grid_calculator, cross, error = self.combine_grid(Pdir, G, step) 1338 1339 # collect all the generated_event 1340 Gdirs = [] #build the the list of directory 1341 for i in range(self.splitted_for_dir(Pdir, G)): 1342 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 1343 Gdirs.append(path) 1344 assert len(grid_calculator.results) == len(Gdirs) == self.splitted_for_dir(Pdir, G) 1345 1346 1347 # Check how many events are going to be kept after un-weighting. 1348 needed_event = cross * self.goal_lum 1349 if needed_event == 0: 1350 return 0 1351 # check that the number of events requested is not higher than the actual 1352 # total number of events to generate. 1353 if self.err_goal >=1: 1354 if needed_event > self.gen_events_security * self.err_goal: 1355 needed_event = int(self.gen_events_security * self.err_goal) 1356 1357 if (Pdir, G) in self.generated_events: 1358 old_nunwgt, old_maxwgt = self.generated_events[(Pdir, G)] 1359 else: 1360 old_nunwgt, old_maxwgt = 0, 0 1361 1362 if old_nunwgt == 0 and os.path.exists(pjoin(Pdir,"G%s" % G, "events.lhe")): 1363 # possible for second refine. 1364 lhe = lhe_parser.EventFile(pjoin(Pdir,"G%s" % G, "events.lhe")) 1365 old_nunwgt = lhe.unweight(None, trunc_error=0.005, log_level=0) 1366 old_maxwgt = lhe.max_wgt 1367 1368 1369 1370 maxwgt = max(grid_calculator.get_max_wgt(), old_maxwgt) 1371 new_evt = grid_calculator.get_nunwgt(maxwgt) 1372 efficiency = new_evt / sum([R.nevents for R in grid_calculator.results]) 1373 nunwgt = old_nunwgt * old_maxwgt / maxwgt 1374 nunwgt += new_evt 1375 1376 # check the number of event for this iteration alone 1377 one_iter_nb_event = grid_calculator.get_nunwgt() 1378 drop_previous_iteration = False 1379 # compare the number of events to generate if we discard the previous iteration 1380 n_target_one_iter = (needed_event-one_iter_nb_event) / ( one_iter_nb_event/ sum([R.nevents for R in grid_calculator.results])) 1381 n_target_combined = (needed_event-nunwgt) / efficiency 1382 if n_target_one_iter < n_target_combined: 1383 # the last iteration alone has more event that the combine iteration. 1384 # it is therefore interesting to drop previous iteration. 1385 drop_previous_iteration = True 1386 nunwgt = one_iter_nb_event 1387 maxwgt = grid_calculator.get_max_wgt() 1388 new_evt = nunwgt 1389 efficiency = ( one_iter_nb_event/ sum([R.nevents for R in grid_calculator.results])) 1390 1391 try: 1392 if drop_previous_iteration: 1393 raise IOError 1394 output_file = open(pjoin(Pdir,"G%s" % G, "events.lhe"), 'a') 1395 except IOError: 1396 output_file = open(pjoin(Pdir,"G%s" % G, "events.lhe"), 'w') 1397 1398 misc.call(["cat"] + [pjoin(d, "events.lhe") for d in Gdirs], 1399 stdout=output_file) 1400 output_file.close() 1401 # For large number of iteration. check the number of event by doing the 1402 # real unweighting. 1403 if nunwgt < 0.6 * needed_event and step > self.min_iter: 1404 lhe = lhe_parser.EventFile(output_file.name) 1405 old_nunwgt =nunwgt 1406 nunwgt = lhe.unweight(None, trunc_error=0.01, log_level=0) 1407 1408 1409 self.generated_events[(Pdir, G)] = (nunwgt, maxwgt) 1410 1411 # misc.sprint("Adding %s event to %s. Currently at %s" % (new_evt, G, nunwgt)) 1412 # check what to do 1413 if nunwgt >= int(0.96*needed_event)+1: # 0.96*1.15=1.10 =real security 1414 # We did it. 1415 logger.info("found enough event for %s/G%s" % (os.path.basename(Pdir), G)) 1416 self.write_results(grid_calculator, cross, error, Pdir, G, step, efficiency) 1417 return 0 1418 elif step >= self.max_iter: 1419 logger.debug("fail to find enough event") 1420 self.write_results(grid_calculator, cross, error, Pdir, G, step, efficiency) 1421 return 0 1422 1423 nb_split_before = len(grid_calculator.results) 1424 nevents = grid_calculator.results[0].nevents 1425 if nevents == 0: # possible if some integral returns 0 1426 nevents = max(g.nevents for g in grid_calculator.results) 1427 1428 need_ps_point = (needed_event - nunwgt)/(efficiency+1e-99) 1429 need_job = need_ps_point // nevents + 1 1430 1431 if step < self.min_iter: 1432 # This is normal but check if we are on the good track 1433 job_at_first_iter = nb_split_before/2**(step-1) 1434 expected_total_job = job_at_first_iter * (2**self.min_iter-1) 1435 done_job = job_at_first_iter * (2**step-1) 1436 expected_remaining_job = expected_total_job - done_job 1437 1438 logger.debug("efficiency status (smaller is better): %s", need_job/expected_remaining_job) 1439 # increase if needed but not too much 1440 need_job = min(need_job, expected_remaining_job*1.25) 1441 1442 nb_job = (need_job-0.5)//(2**(self.min_iter-step)-1) + 1 1443 nb_job = max(1, nb_job) 1444 grid_calculator.write_grid_for_submission(Pdir,G, 1445 self.splitted_for_dir(Pdir, G), nb_job*nevents ,mode=self.mode, 1446 conservative_factor=self.max_iter) 1447 logger.info("%s/G%s is at %i/%i (%.2g%%) event. Resubmit %i job at iteration %i." \ 1448 % (os.path.basename(Pdir), G, int(nunwgt),int(needed_event)+1, 1449 (float(nunwgt)/needed_event)*100.0 if needed_event>0.0 else 0.0, 1450 nb_job, step)) 1451 self.create_resubmit_one_iter(Pdir, G, nevents, nb_job, step) 1452 #self.create_job(Pdir, G, nb_job, nevents, step) 1453 1454 elif step < self.max_iter: 1455 if step + 1 == self.max_iter: 1456 need_job = 1.20 * need_job # avoid to have just too few event. 1457 1458 nb_job = int(min(need_job, nb_split_before*1.5)) 1459 grid_calculator.write_grid_for_submission(Pdir,G, 1460 self.splitted_for_dir(Pdir, G), nb_job*nevents ,mode=self.mode, 1461 conservative_factor=self.max_iter) 1462 1463 1464 logger.info("%s/G%s is at %i/%i ('%.2g%%') event. Resubmit %i job at iteration %i." \ 1465 % (os.path.basename(Pdir), G, int(nunwgt),int(needed_event)+1, 1466 (float(nunwgt)/needed_event)*100.0 if needed_event>0.0 else 0.0, 1467 nb_job, step)) 1468 self.create_resubmit_one_iter(Pdir, G, nevents, nb_job, step) 1469 1470 1471 1472 return 0
1473 1474
1475 - def write_results(self, grid_calculator, cross, error, Pdir, G, step, efficiency):
1476 1477 #compute the value 1478 if cross == 0: 1479 abscross,nw, luminosity = 0, 0, 0 1480 wgt, maxit,nunwgt, wgt, nevents = 0,0,0,0,0 1481 error = 0 1482 else: 1483 grid_calculator.results.compute_values() 1484 abscross = self.abscross[(Pdir,G)]/self.sigma[(Pdir,G)] 1485 nunwgt, wgt = self.generated_events[(Pdir, G)] 1486 nw = int(nunwgt / efficiency) 1487 nunwgt = int(nunwgt) 1488 maxit = step 1489 nevents = nunwgt 1490 # make the unweighting to compute the number of events: 1491 luminosity = nunwgt/cross 1492 1493 #format the results.dat 1494 def fstr(nb): 1495 data = '%E' % nb 1496 nb, power = data.split('E') 1497 nb = float(nb) /10 1498 power = int(power) + 1 1499 return '%.5fE%+03i' %(nb,power)
1500 line = '%s %s %s %i %i %i %i %s %s %s 0.0 0.0 0\n' % \ 1501 (fstr(cross), fstr(error*cross), fstr(error*cross), 1502 nevents, nw, maxit,nunwgt, 1503 fstr(luminosity), fstr(wgt), fstr(abscross)) 1504 1505 fsock = open(pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 1506 'results.dat'),'w') 1507 fsock.writelines(line) 1508 fsock.close()
1509