# YASARA MACRO # TOPIC: 5. Structure prediction # TITLE: Docking many ligands to a receptor to perform virtual screening # REQUIRES: Structure # AUTHOR: Elmar Krieger # LICENSE: GPL # DESCRIPTION: This macro runs VINA or AutoDock to dock multiple ligands against a receptor and saves a sorted table of their binding energies, as well as the corresponding complexes # Parameter section - adjust as needed, but NOTE that some changes only take effect # if you start an entirely new docking job, not if you continue an existing one. # ================================================================================= # You can either set the target structure by clicking on Options > Macro > Set target, # by providing it as command line argument (see docs at Essentials > The command line), # or by uncommenting the line below and specifying it directly. #MacroTarget='/home/myname/projects/docking/1sdf' # Docking method, either AutoDockLGA or VINA method='VINA' # Number of docking runs per ligand (maximally 999, 0 lets YASARA choose based on the number of CPUs) runs=0 # Number of best poses per ligand saved and reported bestposes=1 # Set to 1 to keep the ligand completely rigid (alternatively you can provide # the ligand as a *.yob file and fix certain dihedral angles only). rigid=0 # A selection of receptor residues whose side-chains should be kept flexible, e.g. # flexres = 'Lys 91 Leu 100', or (if the receptor is not a monomer) # flexres = 'Res Lys 91 Mol A or Res Leu 100 Mol B'. # Alternatively you can provide the receptor as a *.sce or *.yob file with fixed # atoms, which gives better control (e.g. you can keep only part of a side-chain # or even a terminal backbone flexible). flexres='' # Sort results either by ligand number ('ligandnum'), binding energy ('bindnrg') or by # efficiency ('effi'), i.e. binding energy per heavy atom, which addresses the bias towards # larger ligands in screening. sortby='bindnrg' # Force field used for charge assignment (AutoDock only, not VINA) and to determine which ligand bonds can rotate. # Do not change to an in vacuo force field (NOVA), all other force fields should work too (not tested). ForceField AMBER03 # Uncomment below to set a certain random seed (a YASARA command, no '=') # RandomSeed 1000 # Normally no change required below this point # ============================================ if !runs # For an optimum speed/accuracy tradeoff, we select at least 8 runs, but more if CPUs are available cpus = Processors runs=cpus while runs<8 runs=runs+cpus # Sanity checks if MacroTarget=='' RaiseError "This macro requires a target. Either edit the macro file or click Options > Macro > Set target to choose a target structure" if runs>999 RaiseError 'Too many docking runs selected, (runs) would take forever' if runs2 ShowMessage 'Your scene contains (Objects) objects, while only the receptor and the docking cell are expected.' Wait ContinueButton HideMessage # Load the ligands and determine the maximum radius for type in 'yob','pdb','sdf' filename='(MacroTarget)_ligands.(type)' exists = FileSize (filename) if exists ligandlist() = Load(type) (filename) break ligands=count ligandlist if not ligands RaiseError 'No ligands were found at (filename)' if ligands==1 # Maybe ligands are separate molecules, not objects ligandlist() = SplitObj (ligandlist1) ligands=count ligandlist if ligands==1 # If there is really just one ligand, raise an error (otherwise dock_play won't work) RaiseError 'Only a single ligand was found at (filename), you need at least two for screening' # Determine maximum ligand radius radiuslist() = RadiusObj (join ligandlist) radiusmax=max radiuslist # Rename objects to make sure they can be identified later, ligands are 'Waiting' to be docked. NameObj (receptor),Receptor NameObj (join ligandlist),Waiting RemoveObj (join ligandlist) # Keep selected side-chains flexible if flexres!='' FixObj Receptor FreeAtom Res (flexres) Sidechain and Obj Receptor FixRes Cys Atom SG with bond to Atom SG or Ala Pro and Obj Receptor # Add a cell if not already present cellfound = CountObj SimCell if !cellfound if scene RaiseError 'The receptor scene (scefilename) does not contain a cell' Cell Auto,Extension=(radiusmax*2) # Warn if the cell is too large x,y,z = Cell if method!='VINA' and (x>384 or y>384 or z>384) # AutoDock now uses dynamic memory allocation for grids up to 1024x1024x1024 (int MAX_GRID_PTS=1025 in autocomm.h) # Given the standard grid spacing of 0.375 A, these are 384 A. VINA doesn't have a compile-time limit ShowMessage "A cell axis is longer than 384 A, which may reduce docking accuracy. Consider focusing on the active site, or remove unusually long ligands from the library" Wait ContinueButton HideMessage # The segment name C*** is used to tag ligand Conformations, and cannot be used for the receptor hit = ListAtom Segment C??? Obj Receptor if hit MarkAtom (hit) segname = SegAtom (hit) RaiseError 'Segment names starting with "C" are unfortunately reserved to identify ligand conformations, please click "Edit > Name > Segment" to rename the receptor segment "(segname)"' # The molecule name 'l' is reserved for the ligand hit = ListAtom Mol l if hit RaiseError 'The receptor must not contain a molecule named "l", please click Edit > Name > Molecule and try again' # Docking is done without periodic boundaries Boundary Wall Longrange None # Do we have a checkpoint file from a previous run? checkpointfilename='(MacroTarget)_checkpoint.sce' restarted = FileSize (checkpointfilename) if restarted LoadSce (checkpointfilename) # Loop over the ligands for i=1 to ligands # Has this ligand been docked in a previous run? ligand=ligandlist(i) ligname = NameObj (ligand) if ligname=='Done' or ligname=='Broken' continue # Place only the ligand in the soup AddObj (ligand) RemoveObj (receptor) # Check that ligand is not internally disconnected conatoms = CountAtom Obj (ligand) with connection to 1 if conatoms!=Atoms # Ligand has bonds missing, cannot be docked failure=1 else # See if the ligand can be simulated or is broken. Try three times. # This must be done without receptor, which may not have been cleaned yet. OnError Continue for j=1 to 3 # Initialize force field parameters, but do not stop if it fails failure = Sim Init if !failure # All OK break if failure==434 # User pressed escape OnError Stop RaiseError "Screening was interrupted. All progress has been saved, so you can simply run this macro again to continue" if j==1 # Try to retype the bonds TypeBond all,all if j==2 # Try to re-add the hydrogens DelHydObj (ligand) CleanObj (ligand) OnError Stop AddObj (receptor) if failure # This compound is hopeless, skip it NameObj (ligand),Broken BFactorObj (ligand),-9999 SegObj (ligand),C001 else # This compound is OK NameObj (ligand),Docking # Get the ligand compound name and display it at the top ligandname = CompoundMol Obj (ligand) LabelAll 'Screening ligand (i)/(ligands), (ligandname)',Height=0.5,Color=Yellow,Y=4,Z=2 # Move the ligand out of the cell, so that it does not block the view cellpos() = PosObj SimCell PosObj (ligand),(cellpos1),(cellpos2+y*0.5+radiusmax),(cellpos3) # Perform rigid docking if requested if rigid FixObj (ligand) # Align ligand with the cell (to end up with the same local atom coordinates independent # of cell orientation, which are used to calculate the checksum for the *.adr file) TransferObj (ligand),SimCell,Local=Keep # Perform the docking Experiment Docking Method (method) ReceptorObj (receptor) LigandObj (ligand) Runs (runs) # We only want a single result, the best complex ClusterRMSD 1000 # In case of docking failures (e.g. cell too small) don't stop, but assign this binding energy to the ligand [kcal/mol] FailureEnergy -9998 # Result file number must be separated with '_', in case MacroTarget also ends with number # If we exceed 999, an additional digit will be inserted. You can change '.yob' to '.pdb'. ResultFile (MacroTarget)_(000+i).yob # Uncomment below to set the number of energy evaluations (AutoDock ga_num_evals): # DockPar ga_num_evals 25000000 # Uncomment the two lines below to provide your own atom parameters (VdW radii etc.) # GridPar parameter_file /Path/To/Custom/AD4_parameters.dat # DockPar parameter_file /Path/To/Custom/AD4_parameters.dat Experiment On Wait ExpEnd UnlabelAll NameObj (ligand),Done RemoveObj (ligand) RecepFlex NameObj RecepFlex,RecFlex(ligand) # Save checkpoint SaveSce (checkpointfilename) # Save a log file with an analysis Console Off RecordLog (MacroTarget) AddObj all print 'Ligand screening result' print '=======================' print print '(ligands) different ligands were docked with (runs) runs each to the receptor object (receptor) yielding the following results,' print 'sorted by binding energy [more positive energies indicate stronger binding, and negative energies mean no binding].' print 'The best (bestposes) poses of each ligand are reported.' print print "Lig.|Effi[kcal/(mol*Atom)]|Bind.energy[kcal/mol]|Dissoc. constant [pM]| Name | Contacting receptor residues" print '----+---------------------+---------------------+---------------------+------+-----------------------------' idx=1 for i=1 to ligands for j=001 to bestposes resultlist(idx),bindnrglist(idx),effilist(idx) = DockingResult i,'Obj (receptor)','Segment C(j) Obj (ligandlist(i))' idx=idx+1 if !(i%5) ShowMessage 'Analyzing docking results, (100*i/ligands)% completed...' Wait 1 if sortby!='ligandnum' # Sort results by binding energy or binding efficiency SortResults 'resultlist','(sortby)list' for i=1 to ligands NameObj (ligandlist(i)),Ligand(i) for i=1 to count resultlist print (resultlist(i)) print rndseed = RandomSeed print 'The random seed used during docking was (rndseed).' fof = ForceField print 'Point charges and dihedral barriers were obtained from the (fof) force field.' StopLog HideMessage Console On SaveSce (MacroTarget) # Temporarily delete all except the best bestposes selection='Segment C??? and not Segment' for i=001 to bestposes selection=selection+' C(i)' DelRes (selection) DelObj SimCell RenumberObj all # Make each ligand a separate object, keep atom numbers and shift objects ahead SplitObj Ligand??????,Center=No,Keep=AtomNum # Save SDF file with the best ligand poses, coordinate system matches receptor if both are loaded without centering SaveSDF Ligand??????,(MacroTarget)_bestposes,Transform=No # Join all objects with atoms to the receptor JoinObj atom all and not Receptor,Receptor,Center=No # Save PDB file with the receptor and best ligand poses SavePDB Receptor,(MacroTarget)_bestposes,Transform=No # Bring back the complete scene LoadSce (MacroTarget) # Exit YASARA if this macro was provided as command line argument in console mode and not included from another macro if runWithMacro and ConsoleMode and !IndentationLevel Exit # EXTRACT DOCKING RESULT # ====================== # Returns a result string and binding energy, extracted from atomic BFactor and Property. # 'num' is a sequential number, 'recsel' selects the receptor, 'ligsel' the ligand. def DockingResult num,recsel,ligsel # Get the binding energy from the B-factor... bindnrg = BFactorAtom (ligsel) # ...and the dissociation constant from the atomic property. dissconst = PropAtom (ligsel) if bindnrg<=0 dissconst=' None' else dissconst= 00000000000000.0000+dissconst # Get compound name compound = CompoundMol (ligsel) if compound=='' compound = NameRes (ligsel) compound=compound+' ' # Get receptor residues that contact the ligand reslist() = ListRes (recsel) with distance<4.0 from (ligsel),Format='MOLNAME RESNAME RESNUM' if bindnrg<=-9999 reslist='Broken molecule, no docking possible' elif bindnrg<=-9998 reslist='Docking failed. Check if cell is too small for ligand' elif !count reslist reslist='No residues in contact' else reslist=join reslist # Calculate ligand efficiency, i.e. binding energy divided by heavy atoms heavyatoms = CountAtom Element !H (ligsel) effi = bindnrg/heavyatoms result='(0000+num)| (000000.0000+effi) | (000000.0000+bindnrg) | (dissconst) | (compound) | (reslist)' return result,bindnrg,effi # SORT KEYLIST BY VALUES, HIGHEST FIRST # ===================================== # Lists are passed by reference, see Yanaconda doc section 'Calls to user-defined functions..' def SortResults keylist,vallist caller (keylist),(vallist) elements=count (keylist) do sorted=1 for i=1 to elements-1 if (vallist)(i)<(vallist)(i+1) swap=(vallist)(i) (vallist)(i)=(vallist)(i+1) (vallist)(i+1)=swap swap=(keylist)(i) (keylist)(i)=(keylist)(i+1) (keylist)(i+1)=swap sorted=0 while not sorted