# The Models@Home cluster interface cluster.py
# Version 2012-01-06
# License: Public Domain

import os,string,time,sys

# ======================================================================
#                           J O B   C L A S S
# ======================================================================

class job_list:
  """
  This class is used to add jobs to the MODELS@HOME cluster job queue
  Create a class instance passing the filename of the job list and the name
  of an error-handling function.
  - Instance.error contains an error description if something went wrong
  - Instance.lock() locks the job list (done automatically by first call to add)
  - Instance.add() adds a job to the list
  - Instance.submit() submits the added jobs and removes the lock
  - Instance.waiting() returns the number of currently waiting jobs
  - Instance.waitresult() waits until the given list of result files have
    returned from the cluster

  Sample job list:

  JOB______S_P_CS_CLIENT IP___________CLIENT NAME______TIME OF JOB SUBMISSION___CHECK____PROGRAM__OPTIONS|DIR|COPY|TEXT|RETURN|DONEFLAG
  00000000 W 0                                                                           WHATIF   |/mnt/home7/cc/elmar/client1|
  00000001 A 5 02 123.234.456.789:001 CMBIPC1          Tue May 21 13:46:22 1991 13fed734 YASARA   -nss -ncd -con -mcr|STARTUP.FIL=
  00000002 W 9                                                                           WHATMB   -step2 align.txt|

  Example:

  job=job_list(os.path.join(wmb_conf["CLUSTER_DIR"],wmb_conf["CLUSTER_JOB"]),error)
  whatifscript=job.convtext("STARTUP.FIL","build\n inibld\n ala\n %makmol\n\n ala.pdb\n all 0\n ONE ALANINE\n\nfullst y\n")
  for i in range(10):
    # ADD JOB
    job.add(8,"WHATIF",'|'+os.getcwd()+"||"+whatifscript+"|ala.pdb|")
  # SUBMIT THEM ALL
  job.submit()

  """

  # A JOB LIST FILENAME MUST ALWAYS END WITH .job
  #   IT IS RENAMED TO *.jupy (job update Python) BEFORE BEING ACCESSED (SO THAT NO OTHER
  #   PROGRAM CAN MODIFY THE FILE)

  # INITIALIZE
  # ==========
  # - listname IS THE PATH AND NAME OF cluster.job
  # - errorfunc IS AN ERROR HANDLING FUNCTION
  def __init__(self,listname,errorfunc=None):
    self.locked=0
    self.error=None
    self.errorfunc=errorfunc
    self.listname=listname
    self.lockname=listname[:-4]+".jupy"
    self.priority=5
    self.list=[]
    self.waitinglist=[]

  # LOCK THE JOB LIST
  # =================
  # THIS FUNCTION RETURNS AS SOON AS THE JOB LIST HAS BEEN LOCKED.
  # Instance.add CAN THEN BE USED TO ADD JOBS.
  def lock(self):
    if (not self.locked):
      # REMOVE THE LOCKED LIST, MIGHT BE A LEFTOVER
      if (os.path.exists(self.lockname)): os.remove(self.lockname)
      # IS THE LIST WITH JOBS PRESENT?
      while (1):
        # WAIT UNTIL LIST FILE IS ACCESSIBLE, THEN RENAME IT TO LOCK
        while (1):
          # TRY TO RENAME
          try: os.rename(self.listname,self.lockname)
          except:
            # SOMEONE ELSE IS USING THE FILE
            time.sleep(1)
            continue
          break
        if (os.path.exists(self.lockname)): break
        print("Job list was renamed to %s but cannot be found now. Trying again." % self.lockname)
      # LOAD LIST
      self.list=open(self.lockname,"r").readlines()
      self.locked=1

  # RAISE AN ERROR
  # ==============
  # CALLS THE ERRORFUNCTION PROVIDED BY THE USER WITH THE GIVEN STRING
  def raiseerror(self,errormsg):
    errormsg=self.__class__.__name__+'.'+errormsg
    self.error=errormsg
    if (self.errorfunc!=None): apply(self.errorfunc,[errormsg])
    else:
      print(errormsg)
      raise SystemExit

  # ADD A JOB TO THE LIST
  # =====================
  # - priority RANGES FROM 0 TO 9
  # - program IS THE 6 LETTER PROGRAM ID
  # - options ARE PROGRAM OPTIONS, DATA FILES, RESULT FILES AND A DONE FILE.
  # - cpus IS THE NUMBER OF CPUs THE JOB NEEDS
  def add(self,priority,program,options,cpus=1):
    if (priority==None): priority=self.priority
    self.waitinglist.append([priority,program,options,cpus])

  # SUBMIT THE JOBS IN CHUNKS
  # =========================
  # THE JOBS ARE SUBMITTED IN CHUNKS, SO THAT NEVER MORE THAN jobs ARE IN THE
  # QUEUE TOGETHER. THIS IS HELPFUL IF DIFFERENT USERS WITH EQUAL PRIVILEGES
  # (AND THUS PRIORITY) SHARE THE CLUSTER RESOURCES.
  def submitchunks(self,jobs,waitflag=1):
    if (jobs==-1): jobs=999999
    # PREPARE THE JOBS
    joblist=[]
    donefilelist=[]
    for i in range(len(self.waitinglist)):
      (priority,program,options,cpus)=self.waitinglist[i]
      options=options.rstrip()
      start=options.find('|')+1
      end=options.find('|',start)
      workdir=options[start:end]
      donefilepos=options.rfind('|')+1
      donefilename=options[donefilepos:]
      if (donefilename=="" and waitflag):
        # ADD DONE FILE IF WE ARE EXPECTED TO WAIT
        donefilename=dsc_tmpfilename("donefile"+str(i))
        options+=donefilename
      if (donefilename!=""):
        donefilename=os.path.join(workdir,donefilename)
        donefilelist.append(donefilename)
        dsc_remove(donefilename)
      if (priority<=0):
        # CHOOSE DEFAULT PRIORITY
        priority=self.priority+priority
      priority=max(0,priority)
      priority=min(9,priority)
      joblist.append(" W %1d %02d                                  " % (priority,cpus) +\
                      "                                      %s   %s\n" % (program,options))
    # SUBMIT THEM IN JUNKS
    i=0
    while (i<len(self.waitinglist)):
      # COUNT DONE FILES
      donefiles=0
      for donefilename in donefilelist:
        if (os.path.exists(donefilename)): donefiles=donefiles+1
      if (i<donefiles+jobs):
        # SUBMIT JOBS
        self.lock()
        # NUMBER OF JOB = NUMBER OF LAST JOB IN LIST+1 */
        if (len(self.list)==1):
          jobnum=0
        else:
          jobnum=int(self.list[-1][0:8])+1
        while (i<donefiles+jobs and i<len(self.waitinglist)):
          if (jobnum>99999999): jobnum=0
          self.list.append("%08d" % jobnum +joblist[i])
          jobnum=jobnum+1
          i=i+1
        # SAVE LIST
        listfile=open(self.lockname,"w")
        listfile.writelines(self.list)
        listfile.close()
        self.unlock()
        if (jobs>=999999): sys.stdout.write("\r%d of %d jobs submitted." % (i,len(donefilelist)))
        else: sys.stdout.write("\r%d of %d jobs submitted in chunks of %d." % (i,len(donefilelist),jobs))
        sys.stdout.flush()
      time.sleep(5)
    self.waitinglist=[]
    if (waitflag):
      # WAIT FOR ALL JOBS
      for i in range(len(donefilelist)):
        donefilename=donefilelist[i]
        sys.stdout.write("\r%d of %d results arrived.                     " % (i,len(donefilelist)))
        sys.stdout.flush()
        while (not os.path.exists(donefilename)): time.sleep(1)
        dsc_remove(donefilename)
      print("\rAll %d results arrived.                   " % len(donefilelist))

  # SUBMIT THE JOBS
  # ===============
  # THE JOBS ARE SUBMITTED TO THE CLUSTER, cluster.job IS UNLOCKED
  # IF waitflag IS SET, THIS FUNCTION WILL ONLY RETURN AFTER ALL RESULTS HAVE
  # BEEN RECEIVED.
  def submit(self,waitflag=1):
    self.submitchunks(999999,waitflag)
  
  # UNLOCK THE JOBLIST
  # ==================
  # THE JOBLIST IS RENAMED BACK TO cluster.job AND BECOMES
  # VISIBLE FOR OTHER PROGRAMS.
  def unlock(self):
    os.rename(self.lockname,self.listname)
    self.locked=0

  # COUNT NUMBER OF WAITING JOBS IN LIST
  # ====================================
  def waiting(self):
    if (not self.locked):
      self.raiseerror("waiting: Waiting method can only be called after job list has been locked")
      return(None)
    wait=0
    for job in self.list:
      if (job[9]=='W'): wait=wait+1
    return(wait)

  # CONVERT NORMAL TEXT TO JOB LIST FORMAT (REPLACE \n WITH #)
  # ==========================================================
  def convtext(self,filename,text):
    # GET RID OF LINE FEEDS
    if (type(text)==types.ListType): text="\n".join(text)
    if (filename=="STARTUP.FIL"):
      # HACK FOR WHAT IF: DISABLE DEBUGGING CRASHES ON THE CLUSTER, WHERE IT HANGS THE NODES
      text="setwif 1012 0\n"+text
    text=text.replace('\n','#')
    # SEARCH FOR UNIQUE CHARACTERS TO QUOTE TEXT IN JOB LIST
    for i in range(34,126):
      if (text.find(chr(i))==-1): break
    else: return(None)
    if (text.find('|')!=-1):
      self.raiseerror("convtext: Text to convert contains '|' - this character is reserved for use by Models@Home")
      return(None)
    return(filename+'='+chr(i)+text+chr(i))

  # WAIT FOR RESULTS
  # ================
  # resultfilelist IS A LIST OF RESULTFILES THAT MUST BE
  # RETURNED BY THE CLUSTER
  def waitresult(self,resultfilelist):
    results=len(resultfilelist)
    if (not results): return
    for i in range(results):
      resultfile=resultfilelist[i]
      sys.stdout.write("\r%d of %d results arrived." % (i,results))
      sys.stdout.flush()
      while (not os.path.exists(resultfile)): time.sleep(1)
    print("\rAll %d results arrived.                   " % results)

# ---------------------------------------------------------------------------

# BUILD TEMPORARY FILE NAME
# =========================
def dsc_tmpfilename(filename):
  dotpos=string.rfind(filename,".")
  if (dotpos==-1): filename=filename+"_tmp"
  else: filename=filename[:dotpos]+".tmp"
  return(filename+str(os.getpid()))

# DELETE A FILE
# =============
def dsc_remove(filename):
  if (filename!=None and os.path.exists(filename)): os.remove(filename)
