"""
A module for (non-TS) species conformer generation
Note:
variables that contain atom indices such as torsions and tops are 1-indexed,
while atoms in Molecules are 0-indexed.
Todo:
* Consider boat-chair conformers (https://en.wikipedia.org/wiki/Cyclohexane_conformation)
* finally, consider h-bonds
* Does it take the scan energy into account when generating combinations??
* The secretary problem - incorporate for stochastic searching
* What's the confirmed bottleneck?
conformers is a list of dictionaries, each with the following keys::
{'xyz': <dict>,
'index': <int>,
'FF energy': <float>,
'source': <str>,
'torsion_dihedrals': {<torsion tuple 0>: angle 0,
<torsion tuple 1>: angle 1,
}
Module workflow::
generate_conformers
generate_force_field_conformers
get_force_field_energies, rdkit_force_field or openbabel_force_field_on_rdkit_conformers,
determine_dihedrals
deduce_new_conformers
get_torsion_angles, determine_torsion_symmetry, determine_torsion_sampling_points,
change_dihedrals_and_force_field_it
get_lowest_confs
"""
import copy
import logging
import sys
import time
from itertools import product
from typing import Dict, List, Optional, Tuple, Union
from openbabel import openbabel as ob
from openbabel import pybel as pyb
from rdkit import Chem
from rdkit.Chem.rdchem import EditableMol as RDMol
import rmgpy.molecule.group as gr
from rmgpy.molecule.converter import to_ob_mol
from rmgpy.molecule.molecule import Atom, Bond, Molecule
from rmgpy.molecule.element import C as C_ELEMENT, H as H_ELEMENT, F as F_ELEMENT, Cl as Cl_ELEMENT, I as I_ELEMENT
from arc.common import (convert_list_index_0_to_1,
determine_top_group_indices,
get_single_bond_length,
generate_resonance_structures,
logger,
)
from arc.exceptions import ConformerError, InputError
import arc.plotter
from arc.species import converter, vectors
# The number of conformers to generate per range of heavy atoms in the molecule
# (will be increased if there are chiral centers)
CONFS_VS_HEAVY_ATOMS = {(0, 3): 75,
(4, 9): 500,
(10, 29): 1000,
(30, 59): 2500,
(60, 99): 5000,
(100, 'inf'): 7500,
}
# The number of conformers to generate per range of potential torsions in the molecule
# (will be increased if there are chiral centers)
CONFS_VS_TORSIONS = {(0, 1): 75,
(2, 5): 500,
(5, 19): 1000,
(20, 34): 2500,
(35, 49): 5000,
(50, 'inf'): 7500,
}
# The resolution (in degrees) for scanning smeared wells
SMEARED_SCAN_RESOLUTIONS = 30.0
# An energy threshold (in kJ/mol) above which wells in a torsion will not be considered (rel. to the most stable well)
DE_THRESHOLD = 5.
# The gap (in degrees) that defines different wells
WELL_GAP = 20
# The maximum number of times to iteratively search for the lowest conformer
MAX_COMBINATION_ITERATIONS = 25
# A threshold below which all combinations will be generated. Above it just samples of the entire search space.
COMBINATION_THRESHOLD = 1000
# Consolidation tolerances for Z matrices
CONSOLIDATION_TOLS = {'R': 1e-2, 'A': 1e-2, 'D': 1e-2}
CHEAT_SHEET = {'[H][H]': {'xyz': converter.str_to_xyz("""H 0.0 0.0 0.3715170
H 0.0 0.0 -0.3715170"""),
'index': 0,
'FF energy': 0,
'source': 'CHEAT_SHEET',
'torsion_dihedrals': {}},
'[CH2]': {'xyz': converter.str_to_xyz("""C 0.0 0.000000 0.1051320
H 0.0 0.988263 -0.3153960
H 0.0 -0.988263 -0.3153960"""),
'index': 0, # Singlet CH2 forms the same H-C-H angle as triplet CH2
'FF energy': 0,
'source': 'CHEAT_SHEET',
'torsion_dihedrals': {}},
'[CH]=C': {'xyz': converter.str_to_xyz("""C 0.0513140 0.7351300 0.0000000
C 0.0513140 -0.5980320 0.0000000
H -0.7050560 1.5248940 0.0000000
H -0.8925320 -1.1684180 0.0000000
H 0.9818240 -1.1790600 0.0000000"""),
'index': 0,
'FF energy': 0,
'source': 'CHEAT_SHEET',
'torsion_dihedrals': {}}
}
[docs]def cheat_sheet(mol_list: Union[List[Molecule], Molecule]) -> Optional[List[Dict]]:
"""
Check if the species is in the cheat sheet, and return its correct xyz if it is.
Args:
mol_list (Union[List[Molecule], Molecule]): Molecule objects to consider (or Molecule, resonance structures will be generated).
Returns: Optional[lis[Dict]]
"""
mol_list = [mol_list] if not isinstance(mol_list, list) else mol_list
for smiles in CHEAT_SHEET.keys():
cheat_mol = Molecule(smiles=smiles)
for mol in mol_list:
if cheat_mol.is_isomorphic(mol.copy(deep=True)):
return [CHEAT_SHEET[smiles]]
return None
[docs]def generate_all_combinations(label, mol, base_xyz, multiple_tors, multiple_sampling_points, len_conformers=-1,
torsions=None, force_field='MMFF94s'):
"""
Generate all combinations of torsion wells from a base conformer.
Args:
label (str): The species' label.
mol (Molecule): The RMG molecule with the connectivity information.
base_xyz (dict): The base 3D geometry to be changed.
multiple_tors (list): Entries are torsion tuples of non-symmetric torsions.
multiple_sampling_points (list): Entries are lists of dihedral angles (sampling points), respectively correspond
to torsions in multiple_tors.
len_conformers (int, optional): The length of the existing conformers list (for consecutive numbering).
force_field (str, optional): The type of force field to use.
torsions (list, optional): A list of all possible torsions in the molecule. Will be determined if not given.
Returns:
list: New conformer combinations, entries are conformer dictionaries.
"""
# Generate sampling points combinations.
product_combinations = list(product(*multiple_sampling_points))
new_conformers = list()
if multiple_tors:
xyzs, energies = change_dihedrals_and_force_field_it(label, mol, xyz=base_xyz, torsions=multiple_tors,
new_dihedrals=product_combinations, optimize=True,
force_field=force_field)
for xyz, energy in zip(xyzs, energies):
if xyz is not None:
new_conformers.append({'index': len_conformers + len(new_conformers),
'xyz': xyz,
'FF energy': energy,
'source': 'Generated all combinations from scan map'})
else:
# No multiple torsions (all torsions are symmetric or no torsions in the molecule), this is a trivial case.
energy = get_force_field_energies(label, mol, num_confs=None, xyz=base_xyz, force_field=force_field,
optimize=True, suppress_warning=True)[1]
if energy is not None and len(energy):
energy = energy[0]
else:
return list()
new_conformers.append({'index': len_conformers + len(new_conformers),
'xyz': base_xyz,
'FF energy': energy,
'source': 'Generated all combinations from scan map (trivial case)'})
if torsions is None:
torsions = determine_rotors([mol])
new_conformers = determine_dihedrals(new_conformers, torsions)
return new_conformers
[docs]def change_dihedrals_and_force_field_it(label, mol, xyz, torsions, new_dihedrals, optimize=True, force_field='MMFF94s'):
"""
Change dihedrals of specified torsions according to the new dihedrals specified, and get FF energies.
Example::
torsions = [(1, 2, 3, 4), (9, 4, 7, 1)]
new_dihedrals = [[90, 120], [90, 300], [180, 270], [30, 270]]
This will calculate the energy of the original conformer (defined using `xyz`).
We iterate through new_dihedrals. The torsions are set accordingly and the energy and xyz of the newly
generated conformer are kept.
We assume that each list entry in new_dihedrals is of the length of the torsions list (2 in the example).
Args:
label (str): The species' label.
mol (Molecule): The RMG molecule with the connectivity information.
xyz (dict): The base 3D geometry to be changed.
torsions (list): Entries are torsion tuples for which the dihedral will be changed relative to xyz.
new_dihedrals (list): Entries are same size lists of dihedral angles (floats) corresponding to the torsions.
optimize (bool, optional): Whether to optimize the coordinates using FF. True to optimize.
force_field (str, optional): The type of force field to use.
Returns:
Tuple[list, list]:
- The conformer FF energies corresponding to the list of dihedrals.
- The conformer xyz geometries corresponding to the list of dihedrals.
"""
if isinstance(xyz, str):
xyz = converter.str_to_xyz(xyz)
if torsions is None or new_dihedrals is None:
xyz, energy = get_force_field_energies(label, mol=mol, xyz=xyz, optimize=True,
force_field=force_field, suppress_warning=True)
return xyz, energy
xyzs, energies = list(), list()
# make sure new_dihedrals is a list of lists (or tuples):
if isinstance(new_dihedrals, (int, float)):
new_dihedrals = [[new_dihedrals]]
if isinstance(new_dihedrals, list) and not isinstance(new_dihedrals[0], (list, tuple)):
new_dihedrals = [new_dihedrals]
for dihedrals in new_dihedrals:
xyz_dihedrals = xyz
for torsion, dihedral in zip(torsions, dihedrals):
conf, rd_mol = converter.rdkit_conf_from_mol(mol, xyz_dihedrals)
if conf is not None:
torsion_0_indexed = [tor - 1 for tor in torsion]
xyz_dihedrals = converter.set_rdkit_dihedrals(conf, rd_mol, torsion_0_indexed, deg_abs=dihedral)
xyz_, energy = get_force_field_energies(label, mol=mol, xyz=xyz_dihedrals, optimize=True,
force_field=force_field, suppress_warning=True)
if energy and xyz_:
energies.append(energy[0])
if optimize:
xyzs.append(xyz_[0])
else:
xyzs.append(xyz_dihedrals)
else:
energies.append(None)
xyzs.append(xyz_dihedrals)
return xyzs, energies
[docs]def determine_rotors(mol_list):
"""
Determine possible unique rotors in the species to be treated as hindered rotors.
Args:
mol_list (list): Localized structures (Molecule objects) by which all rotors will be determined.
Returns:
Tuple[list, list]:
- A list of indices of scan pivots.
- A list of indices of top atoms (including one of the pivotal atoms) corresponding to the torsions.
"""
torsions, tops = list(), list()
for mol in mol_list:
rotors = find_internal_rotors(mol)
for new_rotor in rotors:
for existing_torsion in torsions:
if existing_torsion == new_rotor['scan']:
break
else:
torsions.append(new_rotor['scan'])
tops.append(new_rotor['top'])
return torsions, tops
[docs]def determine_dihedrals(conformers, torsions):
"""
For each conformer in `conformers` determine the respective dihedrals.
Args:
conformers (list): Entries are conformer dictionaries.
torsions (list): All possible torsions in the molecule.
Returns:
list: Entries are conformer dictionaries.
"""
for conformer in conformers:
if isinstance(conformer['xyz'], str):
xyz = converter.str_to_xyz(conformer['xyz'])
else:
xyz = conformer['xyz']
if 'torsion_dihedrals' not in conformer or not conformer['torsion_dihedrals']:
conformer['torsion_dihedrals'] = dict()
for torsion in torsions:
dihedral = vectors.calculate_dihedral_angle(coords=xyz['coords'], torsion=torsion, index=1)
conformer['torsion_dihedrals'][tuple(torsion)] = dihedral
return conformers
[docs]def determine_torsion_sampling_points(label, torsion_angles, smeared_scan_res=None, symmetry=1):
"""
Determine how many points to consider in each well of a torsion for conformer combinations.
Args:
label (str): The species' label.
torsion_angles (list): Well angles in the torsion.
smeared_scan_res (float, optional): The resolution (in degrees) for scanning smeared wells.
symmetry (int, optional): The torsion symmetry number.
Returns:
Tuple[list, list]:
- Sampling points for the torsion.
- Each entry is a well dictionary with the keys
``start_idx``, ``end_idx``, ``start_angle``, ``end_angle``, ``angles``.
"""
smeared_scan_res = smeared_scan_res or SMEARED_SCAN_RESOLUTIONS
sampling_points = list()
wells = get_wells(label, torsion_angles, blank=WELL_GAP)
if symmetry % len(wells) == 0:
symmetry = len(wells)
for i, well in enumerate(wells):
width = abs(well['end_angle'] - well['start_angle'])
mean = sum(well['angles']) / len(well['angles'])
if width <= 2 * smeared_scan_res:
sampling_points.append(mean)
else:
num = int(width / smeared_scan_res)
padding = abs(mean - well['start_angle'] - ((num - 1) * smeared_scan_res) / 2)
sampling_points.extend([padding + well['angles'][0] + smeared_scan_res * j for j in range(int(num))])
if symmetry > 1 and i == len(wells) / symmetry - 1:
break
return sampling_points, wells
[docs]def determine_torsion_symmetry(label, top1, mol_list, torsion_scan):
"""
Check whether a torsion is symmetric.
If a torsion well is "well defined" and not smeared, it could be symmetric.
Check the groups attached to the rotor pivots to determine whether it is indeed symmetric
We don't care about the actual rotor symmetry number here, since we plan to just use the first well
(they're all the same).
Args:
label (str): The species' label.
top1 (list): A list of atom indices on one side of the torsion, including the pivotal atom.
mol_list (list): A list of molecules.
torsion_scan (list): The angles corresponding to this torsion from all conformers.
Returns:
int: The rotor symmetry number.
"""
symmetry = 1
check_tops = [1, 1] # flags for checking top1 and top2
mol = mol_list[0]
top2 = [i + 1 for i in range(len(mol.atoms)) if i + 1 not in top1]
for j, top in enumerate([top1, top2]):
# A quick bypass for carbon rotors that have three equal elements with only one bond that is connected to the carbon atom
if len(top) == 4 and mol.atoms[top[0] - 1].is_carbon() \
and all(mol.atoms[top[1] - 1].symbol == mol.atoms[top[i] - 1].symbol for i in range(2, 4)) \
and all(len(mol.atoms[top[i] - 1].bonds) == 1 for i in range(1, 4)):
symmetry *= 3
check_tops[j] = 0
# A quick bypass for methylene radicals:
if len(top) == 3 and mol.atoms[top[0] - 1].is_carbon() and mol.atoms[top[0] - 1].radical_electrons == 1 \
and all([mol.atoms[top[i] - 1].is_hydrogen() for i in range(1, 3)]):
symmetry *= 2
check_tops[j] = 0
# A quick bypass for benzene rings:
elif len(top) == 11 and sum([mol.atoms[top[i] - 1].is_carbon() for i in range(11)]) == 6 \
and sum([mol.atoms[top[i] - 1].is_hydrogen() for i in range(11)]) == 5:
symmetry *= 2
check_tops[j] = 0
# treat the torsion list as cyclic, search for at least two blank parts of at least 60 degrees each
# if the means of all data parts of the scan are uniformly scattered, the torsion might be symmetric
wells = get_wells(label=label, angles=torsion_scan, blank=60)
distances, well_widths = list(), list()
for i in range(len(wells)):
well_widths.append(abs(wells[i]['end_angle'] - wells[i]['start_angle']))
if i > 0:
distances.append(int(round(abs(wells[i]['start_angle'] - wells[i - 1]['end_angle'])) / 10) * 10)
mean_well_width = sum(well_widths) / len(well_widths)
if len(wells) in [1, 2, 3, 4, 6, 9] and all([distance == distances[0] for distance in distances]) \
and all([abs(width - mean_well_width) / mean_well_width < determine_well_width_tolerance(mean_well_width)
for width in well_widths]):
# All well distances and widths are equal. The torsion scan might be symmetric, check the groups
for j, top in enumerate([top1, top2]):
if check_tops[j]:
groups, grp_idx, groups_indices = list(), list(), list()
sssr_list = mol.get_deterministic_sssr()
sssr_index_list = [[mol.atoms.index(atom) for atom in ring] for ring in sssr_list]
atom_in_ring = dict()
for atom in mol.atoms[top[0] - 1].edges.keys():
atom_index = mol.atoms.index(atom)
for i, ring in enumerate(sssr_index_list):
if atom_index in ring:
if atom_index in atom_in_ring:
atom_in_ring[atom_index].append(i)
else:
atom_in_ring[atom_index] = [i]
if mol.vertices.index(atom) + 1 in top:
atom_indices = determine_top_group_indices(
mol=mol, atom1=mol.atoms[top[0] - 1], atom2=atom, index=0)[0]
groups.append(to_group(mol, atom_indices))
grp_idx.append(atom_indices)
groups_indices.append([g + 1 for g in atom_indices])
flat_list = [num for sublist in atom_in_ring.values() for num in sublist]
if len(flat_list) != len(set(flat_list)):
break
# hard-coding for NO2/NS2 groups, since the two O or S atoms have different atom types in each localized
# structure, hence are not isomorphic
if len(top) == 3 and mol.atoms[top[0] - 1].atomtype.label == 'N5dc' \
and (all([mol.atoms[top[k] - 1].atomtype.label in ['O2d', 'O0sc'] for k in [1, 2]])
or all([mol.atoms[top[k] - 1].atomtype.label in ['S2d', 'S0sc'] for k in [1, 2]])):
symmetry *= 2
# all other groups:
elif not mol.atoms[top[0] - 1].lone_pairs > 0 and not mol.atoms[top[0] - 1].radical_electrons > 0 \
and all([groups[0].is_isomorphic(group, save_order=True) for group in groups[1:]]):
symmetry *= len(groups)
return symmetry
[docs]def add_missing_symmetric_torsion_values(top1, mol_list, torsion_scan):
"""
Add symmetry to a torsion scan in the rotor for efficient conformer generation.
Args:
top1 (list): A list of atom indices on one side of the torsion, including the pivotal atom.
mol_list (list): A list of molecules.
torsion_scan (list): The angles corresponding to this torsion from all conformers.
Returns:
torsion_scan (list): The modified torsion_scan with added symmetric angles.
"""
mol = mol_list[0]
top2 = [i + 1 for i in range(len(mol.atoms)) if i + 1 not in top1]
for j, top in enumerate([top1, top2]):
# A quick bypass for carbon rotors that have three equal elements (e.g., CH3 or CF3) with only one bond that is connected to the carbon atom
# For such rotors, if a torsion angle difference of 60 degrees is found during the sampling, we would like to manually add this gap for the existing symmetries
if len(top) == 4 and mol.atoms[top[0] - 1].is_carbon() \
and all(mol.atoms[top[1] - 1].symbol == mol.atoms[top[i] - 1].symbol for i in range(2, 4)) \
and all(len(mol.atoms[top[i] - 1].bonds) == 1 for i in range(1, 4)):
new_angles = []
for angle in torsion_scan:
test_angle = (angle + 60) % 360
if any((test_angle - 5) % 360 <= existing_angle <= (test_angle + 5) % 360 for existing_angle in torsion_scan):
for angle_tba in torsion_scan:
new_angle = (angle_tba + 60) % 360 # Calculate new angle and ensure it wraps around at 360
# Check if the new angle, adjusted by ±30 degrees, overlaps with any existing angles
if not any((new_angle - 30) % 360 <= existing_angle <= (new_angle + 30) % 360 for existing_angle in torsion_scan + new_angles):
new_angles.append(new_angle)
break
# Extend the original list with non-overlapping new angles
torsion_scan.extend(new_angles)
torsion_scan.sort() # Sort the list to maintain order
return torsion_scan
[docs]def determine_well_width_tolerance(mean_width):
"""
Determine the tolerance by which well widths are determined to be nearly equal.
Fitted to a polynomial trend line for the following data of (mean, tolerance) pairs::
(100, 0.11), (60, 0.13), (50, 0.15), (25, 0.25), (5, 0.50), (1, 0.59)
Args:
mean_width (float): The mean well width in degrees.
Returns:
float: The tolerance.
"""
if mean_width > 100:
return 0.1
tol = -1.695e-10 * mean_width ** 5 + 6.209e-8 * mean_width ** 4 - 8.855e-6 * mean_width ** 3 \
+ 6.446e-4 * mean_width ** 2 - 2.610e-2 * mean_width + 0.6155
return tol
[docs]def get_lowest_confs(label: str,
confs: Union[dict, list],
n: Optional[int] = 10,
e: Optional[float] = 5.0,
energy: str = 'FF energy',
) -> list:
"""
Get the most stable conformer.
Args:
label (str): The species' label.
confs (dict, list): Entries are either conformer dictionaries or a length two list of xyz coordinates and energy
n (int, optional): Number of lowest conformers to return.
e (float, optional): The energy threshold above the lowest energy conformer in kJ/mol
below which all conformers will be returned.
energy (str, optional): The energy attribute to search by. Currently only 'FF energy' is supported.
Raises:
ConformerError: If n < 1, e < 0, both n and e are ``None``, or if no conformers are given.
Returns:
list: Conformer dictionaries.
"""
if e is not None:
if e < 0:
raise ConformerError(f'e cannot be negative, got: {e}')
elif n is not None:
if n < 1:
raise ConformerError(f'n cannot be lower than 1, got: {n}')
else:
raise ConformerError('Either `n` or `e` must be specified')
if not confs or confs is None:
raise ConformerError(f'get_lowest_confs() got no conformers for {label}')
if isinstance(confs[0], list):
conformer_list = list()
for entry in confs:
if entry[1] is not None:
conformer_list.append({'xyz': entry[0], energy: entry[1]})
elif isinstance(confs[0], dict):
conformer_list = [conformer for conformer in confs if energy in conformer and conformer[energy] is not None]
else:
raise ConformerError(f'confs could either be a list of dictionaries or a list of lists. '
f'Got a list of {type(confs[0])}s for {label}')
conformer_list.sort(key=lambda conformer: conformer[energy], reverse=False)
if e is not None:
min_e = min([conf[energy] for conf in conformer_list])
lowest_confs = [conformer_list[0]]
for index in range(len(conformer_list)):
if (e is not None and conformer_list[index][energy] > min_e + e) or (n is not None and len(lowest_confs) >= n):
break
if index > 0:
for lowest_conf in lowest_confs:
_, dmat1, lowest_conf, similar = converter.compare_confs_fl(conformer_list[index]['xyz'],lowest_conf)
if not similar or not converter.compare_confs(conformer_list[index]['xyz'], lowest_conf['xyz'],
skip_conversion=True,
dmat1=dmat1,dmat2=lowest_conf['dmat']):
lowest_confs.append(conformer_list[index]) if lowest_conf==lowest_confs[-1] else None
else:
break
return lowest_confs
[docs]def get_torsion_angles(label, conformers, torsions, mol_list=None, tops=None):
"""
Populate each torsion pivots with all available angles from the generated conformers.
Args:
label (str): The species' label.
conformers (list): The conformers from which to extract the angles.
torsions (list): The torsions to consider.
mol_list (list, optional): A list of Molecule objects.
tops (list, optional): A list of tops corresponding to torsions.
Returns:
dict: The torsion angles. Keys are torsion tuples, values are lists of all corresponding angles from conformers.
"""
torsion_angles = dict()
if len(conformers) and not any(['torsion_dihedrals' in conformer for conformer in conformers]):
raise ConformerError(f'Could not determine dihedral torsion angles for {label}. '
f'Consider calling `determine_dihedrals()` first.')
for conformer in conformers:
if 'torsion_dihedrals' in conformer and conformer['torsion_dihedrals']:
for torsion in torsions:
if tuple(torsion) not in torsion_angles:
torsion_angles[tuple(torsion)] = list()
torsion_angles[tuple(torsion)].append(conformer['torsion_dihedrals'][tuple(torsion)])
for tor in torsion_angles.keys():
torsion_angles[tor].sort()
if mol_list is not None and tops is not None:
for torsion, top in zip(torsions, tops):
torsion_angles[tuple(torsion)] = add_missing_symmetric_torsion_values(top, mol_list, torsion_angles[tuple(torsion)])
return torsion_angles
[docs]def get_force_field_energies(label: str,
mol: Molecule,
num_confs: int = None,
xyz: dict = None,
force_field: str = 'MMFF94s',
try_uff: bool = True,
optimize: bool = True,
try_ob: bool = False,
suppress_warning: bool = False,
) -> Tuple[list, list]:
"""
Determine force field energies using RDKit.
If ``num_confs`` is given, random 3D geometries will be generated. If xyz is given, it will be directly used instead.
The coordinates are returned in the order of atoms in mol.
Args:
label (str): The species' label.
mol (Molecule): The RMG molecule object with connectivity and bond order information.
num_confs (int, optional): The number of random 3D conformations to generate.
xyz (dict, optional): The 3D coordinates guess.
force_field (str, optional): The type of force field to use.
try_uff (bool, optional): Whether to try UFF if MMFF94(s) fails. ``True`` by default.
optimize (bool, optional): Whether to first optimize the conformer using FF. True to optimize.
try_ob (bool, optional): Whether to try OpenBabel if RDKit fails. ``True`` to try, ``True`` by default.
suppress_warning (bool, optional): Whether to suppress OpenBabel warnings. ``False`` by default.
Raises:
ConformerError: If conformers could not be generated.
Returns:
Tuple[list, list]:
- Entries are xyz coordinates, each in a dict format.
- Entries are the FF energies (in kJ/mol).
"""
xyzs, energies = list(), list()
if force_field.lower() in ['mmff94', 'mmff94s', 'uff']:
rd_mol = embed_rdkit(label, mol, num_confs=num_confs, xyz=xyz)
if rd_mol is not None:
xyzs, energies = rdkit_force_field(label,
rd_mol,
mol,
force_field=force_field,
try_uff=try_uff,
optimize=optimize,
)
if not len(xyzs) and force_field.lower() in ['gaff', 'mmff94', 'mmff94s', 'uff', 'ghemical'] and try_ob:
if not suppress_warning:
logger.warning(f'Using OpenBabel (instead of RDKit) as a fall back method to generate conformers for {label}. '
f'This is often slower.')
xyzs, energies = openbabel_force_field(label,
mol,
num_confs,
xyz=xyz,
force_field=force_field,
)
if not len(xyzs):
if force_field.lower() not in ['mmff94', 'mmff94s', 'uff', 'gaff', 'ghemical']:
raise ConformerError(f'Unrecognized force field for {label}. Should be either MMFF94, MMFF94s, UFF, '
f'Ghemical, or GAFF. Got: {force_field}.')
return xyzs, energies
[docs]def mix_rdkit_and_openbabel_force_field(label,
mol,
num_confs=None,
xyz=None,
force_field='GAFF',
try_ob=False):
"""
Optimize conformers using a force field (GAFF, MMFF94s, MMFF94, UFF, Ghemical)
Use RDKit to generate the random conformers (OpenBabel isn't good enough),
but use OpenBabel to optimize them (RDKit doesn't have GAFF).
Args:
label (str): The species' label.
mol (Molecule, optional): The RMG molecule object with connectivity and bond order information.
num_confs (int, optional): The number of random 3D conformations to generate.
xyz (string or list, optional): The 3D coordinates in either a string or an array format.
force_field (str, optional): The type of force field to use.
try_ob (bool, optional): Whether to try OpenBabel if RDKit fails. ``True`` to try, ``False`` by default.
Returns:
Tuple[list, list]:
- Entries are optimized xyz's in a list format.
- Entries are float numbers representing the energies in kJ/mol.
"""
xyzs, energies = list(), list()
rd_mol = embed_rdkit(label, mol, num_confs=num_confs, xyz=xyz)
if rd_mol is None:
return xyzs, energies
unoptimized_xyzs = list()
for i in range(rd_mol.GetNumConformers()):
conf, xyz = rd_mol.GetConformer(i), list()
for j in range(conf.GetNumAtoms()):
pt = conf.GetAtomPosition(j)
xyz.append([pt.x, pt.y, pt.z])
xyz = [xyz[j] for j, _ in enumerate(xyz)] # reorder
unoptimized_xyzs.append(xyz)
if try_ob:
if not len(unoptimized_xyzs):
# use OB as the fallback method
logger.warning(f'Using OpenBabel (instead of RDKit) as a fall back method to generate conformers for {label}. '
f'This is often slower, and prohibits ARC from using all features of the conformers module.')
xyzs, energies = openbabel_force_field(label, mol, num_confs, force_field=force_field)
else:
for xyz in unoptimized_xyzs:
xyzs_, energies_ = openbabel_force_field(label, mol, num_confs, xyz=xyz, force_field=force_field)
xyzs.extend(xyzs_)
energies.extend(energies_)
return xyzs, energies
[docs]def openbabel_force_field(label, mol, num_confs=None, xyz=None, force_field='GAFF', method='diverse'):
"""
Optimize conformers using a force field (GAFF, MMFF94s, MMFF94, UFF, Ghemical).
Args:
label (str): The species' label.
mol (Molecule, optional): The RMG molecule object with connectivity and bond order information.
num_confs (int, optional): The number of random 3D conformations to generate.
xyz (dict, optional): The 3D coordinates.
force_field (str, optional): The type of force field to use.
method (str, optional): The conformer searching method to use in OpenBabel.
For method description, see https://openbabel.org/dev-api/group__conformer.shtml
Returns:
Tuple[list, list]:
- Entries are optimized xyz's in a list format.
- Entries are float numbers representing the energies in kJ/mol.
"""
xyzs, energies = list(), list()
ff = ob.OBForceField.FindForceField(force_field)
if xyz is not None:
obmol = ob.OBMol()
atoms = mol.vertices
ob_atom_ids = dict()
for i, atom in enumerate(atoms):
a = obmol.NewAtom()
a.SetAtomicNum(atom.number)
a.SetVector(xyz['coords'][i][0], xyz['coords'][i][1], xyz['coords'][i][2])
if atom.element.isotope != -1:
a.SetIsotope(atom.element.isotope)
a.SetFormalCharge(atom.charge)
ob_atom_ids[atom] = a.GetId()
orders = {1: 1, 2: 2, 3: 3, 4: 4, 1.5: 5}
for atom1 in mol.vertices:
for atom2, bond in atom1.edges.items():
if bond.is_hydrogen_bond():
continue
index1 = atoms.index(atom1)
index2 = atoms.index(atom2)
if index1 < index2:
obmol.AddBond(index1 + 1, index2 + 1, orders[bond.order])
# optimize
ff.Setup(obmol)
ff.SetLogLevel(0)
ff.SetVDWCutOff(6.0) # The VDW cut-off distance (default=6.0)
ff.SetElectrostaticCutOff(10.0) # The Electrostatic cut-off distance (default=10.0)
ff.SetUpdateFrequency(10) # The frequency to update the non-bonded pairs (default=10)
ff.EnableCutOff(False) # Use cut-off (default=don't use cut-off)
ff.SteepestDescentInitialize() # ConjugateGradientsInitialize
v = 1
while v:
v = ff.SteepestDescentTakeNSteps(1) # ConjugateGradientsTakeNSteps
if ff.DetectExplosion():
raise ConformerError(f'Force field {force_field} exploded with method SteepestDescent for {label}')
ff.GetCoordinates(obmol)
elif num_confs is not None:
obmol, ob_atom_ids = to_ob_mol(mol, return_mapping=True)
pybmol = pyb.Molecule(obmol)
pybmol.make3D()
obmol = pybmol.OBMol
ff.Setup(obmol)
if method.lower() == 'weighted':
ff.WeightedRotorSearch(num_confs, 2000)
elif method.lower() == 'random':
ff.RandomRotorSearch(num_confs, 2000)
elif method.lower() == 'diverse':
rmsd_cutoff = 0.5
energy_cutoff = 50.
confab_verbose = False
ff.DiverseConfGen(rmsd_cutoff, num_confs, energy_cutoff, confab_verbose)
elif method.lower() == 'systematic':
ff.SystematicRotorSearch(num_confs)
else:
raise ConformerError(f'Could not identify method {method} for {label}')
else:
raise ConformerError(f'Either num_confs or xyz should be given for {label}')
ff.GetConformers(obmol)
obconversion = ob.OBConversion()
obconversion.SetOutFormat('xyz')
for i in range(obmol.NumConformers()):
obmol.SetConformer(i)
ff.Setup(obmol)
xyz_str = '\n'.join(obconversion.WriteString(obmol).splitlines()[2:])
xyz_dict = converter.str_to_xyz(xyz_str)
xyz_dict['coords'] = tuple(xyz_dict['coords'][ob_atom_ids[mol.atoms[j]]]
for j in range(len(xyz_dict['coords'])))
xyzs.append(xyz_dict)
energies.append(ff.Energy())
return xyzs, energies
[docs]def embed_rdkit(label, mol, num_confs=None, xyz=None):
"""
Generate unoptimized conformers in RDKit. If ``xyz`` is not given, random conformers will be generated.
Args:
label (str): The species' label.
mol (RMG Molecule or RDKit RDMol): The molecule object with connectivity and bond order information.
num_confs (int, optional): The number of random 3D conformations to generate.
xyz (dict, optional): The 3D coordinates.
Returns:
Optional[RDMol]: An RDKIt molecule with embedded conformers.
"""
if num_confs is None and xyz is None:
raise ConformerError(f'Either num_confs or xyz must be set when calling embed_rdkit() for {label}')
if isinstance(mol, RDMol):
rd_mol = mol
elif isinstance(mol, Molecule):
rd_mol = converter.to_rdkit_mol(mol=mol, remove_h=False)
else:
raise ConformerError(f'Argument mol can be either an RMG Molecule or an RDKit RDMol object. '
f'Got {type(mol)} for {label}')
if num_confs is not None:
try:
Chem.AllChem.EmbedMultipleConfs(rd_mol, numConfs=num_confs, randomSeed=1, enforceChirality=True)
except:
logger.warning(f'Could not embed conformers using RDKit for {label}')
return None
elif xyz is not None:
rd_conf = Chem.Conformer(rd_mol.GetNumAtoms())
for i in range(rd_mol.GetNumAtoms()):
rd_conf.SetAtomPosition(i, xyz['coords'][i])
rd_mol.AddConformer(rd_conf)
return rd_mol
[docs]def rdkit_force_field(label: str,
rd_mol: Optional[RDMol],
mol: Optional[Molecule] = None,
num_confs: Optional[int] = None,
force_field: str = 'MMFF94s',
try_uff: bool = True,
optimize: bool = True,
try_ob: bool = False,
) -> Tuple[list, list]:
"""
Optimize RDKit conformers using a force field (MMFF94 or MMFF94s are recommended).
For UFF see: https://www.rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html#rdkit.Chem.rdForceFieldHelpers.UFFOptimizeMoleculeConfs
Args:
label (str): The species' label.
rd_mol (RDKit RDMol): The RDKit molecule with embedded conformers to optimize.
mol (Molecule, optional): The RMG molecule object with connectivity and bond order information.
num_confs (int, optional): The number of random 3D conformations to generate.
force_field (str, optional): The type of force field to use.
try_uff (bool, optional): Whether to try UFF if MMFF94(s) fails. ``True`` by default.
optimize (bool, optional): Whether to first optimize the conformer using FF. True to optimize.
try_ob (bool, optional): Whether to try OpenBabel if RDKit fails. ``True`` to try, ``False`` by default.
Returns:
Tuple[list, list]:
- Entries are optimized xyz's in a dictionary format.
- Entries are float numbers representing the energies.
"""
xyzs, energies = list(), list()
if rd_mol is None:
return xyzs, energies
for i in range(rd_mol.GetNumConformers()):
if optimize:
v, j = 1, 0
while v == 1 and j < 200: # v == 1: continue, v == 0: enough steps, v == -1: unable to set up
try:
v = Chem.AllChem.MMFFOptimizeMolecule(rd_mol,
mmffVariant=force_field,
confId=i,
maxIters=500,
ignoreInterfragInteractions=False,
)
except:
pass
else:
j += 1
mol_properties = Chem.AllChem.MMFFGetMoleculeProperties(rd_mol, mmffVariant=force_field)
if mol_properties is not None:
ff = Chem.AllChem.MMFFGetMoleculeForceField(rd_mol, mol_properties, confId=i)
if optimize:
energies.append(ff.CalcEnergy())
xyzs.append(read_rdkit_embedded_conformer_i(rd_mol, i))
if not len(xyzs) and 'MMFF' in force_field and try_uff:
output = None
if optimize:
try:
output = Chem.AllChem.UFFOptimizeMoleculeConfs(rd_mol,
maxIters=200,
ignoreInterfragInteractions=True,
)
except (RuntimeError, ValueError):
if try_ob:
logger.warning(f'Using OpenBabel (instead of RDKit) as a fall back method to generate conformers '
f'for {label}. This is often slower.')
xyzs, energies = openbabel_force_field_on_rdkit_conformers(label,
rd_mol,
force_field=force_field,
optimize=optimize,
)
return xyzs, energies
for i in range(rd_mol.GetNumConformers()):
if output is not None and output[i][0] == 0: # The optimization converged.
energies.append(output[i][1])
xyzs.append(read_rdkit_embedded_conformer_i(rd_mol, i))
return xyzs, energies
[docs]def get_wells(label, angles, blank=WELL_GAP):
"""
Determine the distinct wells from a list of angles.
Args:
label (str): The species' label.
angles (list): The angles in the torsion.
blank (int, optional): The blank space between wells.
Returns:
list: Entry are well dicts with keys: ``start_idx``, ``end_idx``, ``start_angle``, ``end_angle``, ``angles``.
"""
if not angles:
raise ConformerError(f'Cannot determine wells without angles for {label}')
new_angles = angles
if angles[0] < 0 + blank and angles[-1] > 360 - blank:
# relocate the first chunk of data at the end, the well seems to include the +180/-180 degrees point
for i, angle in enumerate(angles):
if i > 0 and abs(angle - angles[i - 1]) > blank:
part2 = angles[:i]
for j, _ in enumerate(part2):
part2[j] += 360
new_angles = angles[i:] + part2
break
wells = list()
new_well = True
for i in range(len(new_angles) - 1):
if new_well:
wells.append({'start_idx': i,
'end_idx': None,
'start_angle': new_angles[i],
'end_angle': None,
'angles': list()})
new_well = False
wells[-1]['angles'].append(new_angles[i])
if abs(new_angles[i + 1] - new_angles[i]) > blank:
# This is the last point in this well
wells[-1]['end_idx'] = i
wells[-1]['end_angle'] = new_angles[i]
new_well = True
if len(wells):
wells[-1]['end_idx'] = len(new_angles) - 1
wells[-1]['end_angle'] = new_angles[-1]
wells[-1]['angles'].append(new_angles[-1])
return wells
[docs]def check_special_non_rotor_cases(mol, top1, top2):
"""
Check whether one of the tops correspond to a special case which does not have a torsional mode.
Checking for ``R-[C,N]#[N,[CH],[C]]`` groups, such as: in cyano groups (`R-C#N``),
C#C groups (``R-C#CH`` or ``R-C#[C]``), and azide groups: (``R-N#N``).
Args:
mol (Molecule): The RMG molecule.
top1 (list): Entries are atom indices (1-indexed) on one side of the torsion, inc. one of the pivotal atoms.
top2 (list): Entries are atom indices (1-indexed) on the other side of the torsion, inc. the other pivotal atom.
Returns:
bool: ``True`` if this is indeed a special case which should **not** be treated as a torsional mode.
"""
for top in [top1, top2]:
if mol.atoms[top[0] - 1].atomtype.label in ['Ct', 'N3t', 'N5tc'] \
and mol.atoms[top[1] - 1].atomtype.label in ['Ct', 'N3t'] and \
(len(top) == 2 or (len(top) == 3 and mol.atoms[top[2] - 1].is_hydrogen())):
return True
return False
[docs]def find_internal_rotors(mol):
"""
Locates the sets of indices corresponding to every internal rotor (1-indexed).
Args:
mol (Molecule): The molecule for which rotors will be determined.
Returns:
list: Entries are rotor dictionaries with the four-atom scan coordinates, the pivots, and the smallest top.
"""
rotors = list()
for atom1 in mol.vertices:
if atom1.is_non_hydrogen():
for atom2, bond in atom1.edges.items():
if atom2.is_non_hydrogen() and mol.vertices.index(atom1) < mol.vertices.index(atom2) \
and (bond.is_single() or bond.is_hydrogen_bond()) and not mol.is_bond_in_cycle(bond):
if len(atom1.edges) > 1 and len(atom2.edges) > 1: # None of the pivotal atoms are terminal.
rotor = dict()
# pivots:
rotor['pivots'] = [mol.vertices.index(atom1) + 1, mol.vertices.index(atom2) + 1]
# top:
top1, top1_has_heavy_atoms = determine_top_group_indices(mol, atom2, atom1, index=1)
top2, top2_has_heavy_atoms = determine_top_group_indices(mol, atom1, atom2, index=1)
non_rotor = check_special_non_rotor_cases(mol, top1, top2)
if non_rotor:
continue
if top1_has_heavy_atoms and not top2_has_heavy_atoms:
rotor['top'] = top2
elif top2_has_heavy_atoms and not top1_has_heavy_atoms:
rotor['top'] = top1
else:
rotor['top'] = top1 if len(top1) <= len(top2) else top2
# scan:
rotor['scan'] = [determine_smallest_atom_index_in_scan(atom1=atom1, atom2=atom2, mol=mol)]
rotor['scan'].extend([mol.vertices.index(atom1) + 1, mol.vertices.index(atom2) + 1])
rotor['scan'].append(determine_smallest_atom_index_in_scan(atom1=atom2, atom2=atom1, mol=mol))
rotor['torsion'] = convert_list_index_0_to_1(rotor['scan'], direction=-1)
# other keys:
rotor['number_of_running_jobs'] = 0
rotor['success'] = None
rotor['invalidation_reason'] = ''
rotor['times_dihedral_set'] = 0
rotor['trsh_counter'] = 0
rotor['trsh_methods'] = list()
rotor['scan_path'] = ''
rotor['directed_scan_type'] = 'ess' # default to 'ess', changed in initialize_directed_rotors()
rotor['directed_scan'] = dict()
rotor['dimensions'] = 1
rotor['original_dihedrals'] = list()
rotor['cont_indices'] = list()
rotor['symmetry'] = None
rotor['max_e'] = None
rotors.append(rotor)
return rotors
[docs]def determine_smallest_atom_index_in_scan(atom1: Atom,
atom2: Atom,
mol: Molecule,
) -> int:
"""
Determine the smallest atom index in ``mol`` connected to ``atom1`` which is not ``atom2``.
Returns a heavy atom if available, otherwise a hydrogen atom.
Useful for deterministically determining the indices of four atom in a scan.
This function assumes there ARE additional atoms connected to ``atom1``, and that ``atom2`` is not a hydrogen atom.
Args:
atom1 (Atom): The atom whose neighbors will be searched.
atom2 (Atom): An atom connected to ``atom1`` to exclude (a pivotal atom).
mol (Molecule): The molecule to process.
Returns:
int: The smallest atom index (1-indexed) connected to ``atom1`` which is not ``atom2``.
"""
heavy_atoms, hydrogens = list(), list()
for atom3 in atom1.edges.keys():
if atom3.is_hydrogen():
hydrogens.append(mol.vertices.index(atom3))
elif atom3 is not atom2:
heavy_atoms.append(mol.vertices.index(atom3))
smallest_index = len(mol.vertices)
if len(heavy_atoms):
for atom_index in heavy_atoms:
if atom_index < smallest_index:
smallest_index = atom_index
else:
for atom_index in hydrogens:
if atom_index < smallest_index:
smallest_index = atom_index
return smallest_index + 1
[docs]def to_group(mol, atom_indices):
"""
This method converts a defined part of a Molecule into a Group.
Args:
mol (Molecule): The base molecule.
atom_indices (list): 0-indexed atom indices corresponding to atoms in mol to be included in the group.
Returns:
Group: A group consisting of the desired atoms in mol.
"""
# Create GroupAtom object for each atom in the molecule
group_atoms = list()
index_map = dict() # keys are Molecule atom indices, values are Group atom indices
for i, atom_index in enumerate(atom_indices):
atom = mol.atoms[atom_index]
group_atoms.append(gr.GroupAtom(atomtype=[atom.atomtype], radical_electrons=[atom.radical_electrons],
charge=[atom.charge], lone_pairs=[atom.lone_pairs]))
index_map[atom_index] = i
group = gr.Group(atoms=group_atoms, multiplicity=[mol.multiplicity])
for atom in mol.atoms:
# Create a GroupBond for each bond between desired atoms in the molecule
if mol.atoms.index(atom) in atom_indices:
for bonded_atom, bond in atom.edges.items():
if mol.atoms.index(bonded_atom) in atom_indices:
group.add_bond(gr.GroupBond(atom1=group_atoms[index_map[mol.atoms.index(atom)]],
atom2=group_atoms[index_map[mol.atoms.index(bonded_atom)]],
order=[bond.order]))
group.update()
return group
[docs]def update_mol(mol):
"""
Update atom types, multiplicity, and atom charges in the molecule.
Args:
mol (Molecule): The molecule to update.
Returns:
Molecule: the updated molecule.
"""
for atom in mol.atoms:
atom.update_charge()
mol.update_atomtypes(log_species=False, raise_exception=False)
mol.update_multiplicity()
mol.identify_ring_membership()
return mol
[docs]def translate_groups(label, mol, xyz, pivot):
"""
Exchange between two groups in a molecule. The groups cannot share a ring with the pivotal atom.
The function does not change the atom order, just the coordinates of atoms.
If the pivotal atom has exactly one lone pair, consider it as well as a dummy atom in translations.
Args:
label (str): The species' label.
mol (Molecule): The 2D graph representation of the molecule.
xyz (dict): A string-format 3d coordinates of the molecule with the same atom order as in mol.
pivot (int): The 0-index of the pivotal atom around which groups are to be translated.
Returns:
dict: The translated coordinates.
"""
mol.identify_ring_membership() # populates the Atom.props['inRing'] attribute
atom1 = mol.atoms[pivot]
lp = atom1.lone_pairs
if lp > 1:
logger.warning(f'Cannot translate groups for {label} if the pivotal atom has more than one '
f'lone electron pair')
return xyz
groups, translate, dont_translate = list(), list(), list()
for atom2 in mol.atoms[pivot].edges.keys():
top = determine_top_group_indices(mol, atom1, atom2, index=0)[0]
groups.append({'atom': atom2, 'protons': sum([mol.atoms[i].number for i in top])}) # a dict per top
if 'inRing' in atom1.props and atom1.props['inRing'] and 'inRing' in atom2.props and atom2.props['inRing']:
# check whether atom1 and atom2 belong to the same ring
sssr = mol.get_deterministic_sssr()
for ring in sssr:
if atom1 in ring and atom2 in ring:
dont_translate.append(atom2)
break
groups.sort(key=lambda x: x['protons'], reverse=False) # sort by the size (sum of atomic numbers)
i = 0
while len(translate) < 2 - lp and i < len(groups):
if groups[i]['atom'] not in dont_translate:
translate.append(groups[i])
i += 1
if len(translate) == 1 and lp:
vector = vectors.get_lp_vector(label, mol=mol, xyz=xyz, pivot=pivot)
new_xyz = translate_group(mol=mol, xyz=xyz, pivot=pivot,
anchor=mol.atoms.index(translate[0]['atom']), vector=vector)
elif len(translate) == 2 and not lp:
vector = vectors.get_vector(pivot=pivot, anchor=mol.atoms.index(translate[1]['atom']), xyz=xyz)
new_xyz = translate_group(mol=mol, xyz=xyz, pivot=pivot,
anchor=mol.atoms.index(translate[0]['atom']), vector=vector)
# keep original xyz:
vector = vectors.get_vector(pivot=pivot, anchor=mol.atoms.index(translate[0]['atom']), xyz=xyz)
new_xyz = translate_group(mol=mol, xyz=new_xyz, pivot=pivot,
anchor=mol.atoms.index(translate[1]['atom']), vector=vector)
else:
if lp:
raise ConformerError(f'The number of groups to translate is {len(translate)}, expected 1 '
f'(with a lone pair) for {label}.')
else:
raise ConformerError(f'The number of groups to translate is {len(translate)}, expected 2 for {label}.')
return new_xyz
[docs]def translate_group(mol, xyz, pivot, anchor, vector):
"""
Translate a group (a set of atoms from the pivot towards the anchor and onwards) by changing its
pivot -> anchor vector to the desired new vector. Keep the relative distances between the group's atoms constant,
as well as the distance between the anchor and the vector atoms.
Args:
mol (Molecule): The 2D graph representation of the molecule.
xyz (dict): The 3D coordinates of the molecule with the same atom order as in mol.
pivot (int): The 0-index of the pivotal atom around which groups are to be translated.
anchor (int): The 0-index of an anchor atom. The group is defined from the pivot atom to the anchor atom,
including all other atoms in the molecule connected to the anchor. The pivot and anchor
atoms should not have another path connecting them such as a ring.
vector (list): The new vector by which the group will be translated.
Returns:
dict: The translated coordinates.
"""
# v1 = unit_vector([-vector[0], -vector[1], -vector[2]]) # reverse the direction to get the correct angle
v1 = vectors.unit_vector(vector)
v2 = vectors.unit_vector(vectors.get_vector(pivot=pivot, anchor=anchor, xyz=xyz))
normal = vectors.get_normal(v2, v1)
theta = vectors.get_angle(v1, v2)
# print(theta * 180 / math.pi) # print theta in degrees when troubleshooting
# All atoms within the group will be rotated around the same normal vector by theta:
group = determine_top_group_indices(mol=mol, atom1=mol.atoms[pivot], atom2=mol.atoms[anchor], index=0)[0]
coords = converter.xyz_to_coords_list(xyz)
for i in group:
coords[i] = vectors.rotate_vector(point_a=coords[pivot], point_b=coords[i], normal=normal, theta=theta)
new_xyz = converter.xyz_from_data(coords=coords, symbols=xyz['symbols'], isotopes=xyz['isotopes'])
return new_xyz
[docs]def get_number_of_chiral_centers(label, mol, conformer=None, xyz=None, just_get_the_number=True):
"""
Determine the number of chiral centers by type. Either ``conformer`` or ``xyz`` must be given.
Args:
label (str): The species label.
mol (Molecule): The RMG Molecule object.
conformer (dict, optional): A conformer dictionary.
xyz (dict, optional): The xyz coordinates.
just_get_the_number (bool, optional): Return the number of chiral centers regardless of their type.
Raises:
InputError: If neither ``conformer`` nor ``xyz`` were given.
Returns:
Optional[dict, int]:
Keys are types of chiral sites ('C' for carbon, 'N' for nitrogen, 'D' for double bond),
values are the number of chiral centers of each type. If ``just_get_the_number`` is ``True``,
just returns the number of chiral centers (integer).
"""
if conformer is None and xyz is None:
raise InputError('Must get either conformer or xyz.')
if conformer is None:
conformer = {'xyz': xyz}
conformer = determine_chirality(conformers=[conformer], label=label, mol=mol)[0]
result = {'C': 0, 'N': 0, 'D': 0}
for symbol in conformer['chirality'].values():
if symbol in ['R', 'S']:
result['C'] += 1
elif symbol in ['NR', 'NS']:
result['N'] += 1
elif symbol in ['E', 'Z']:
result['D'] += 1
else:
raise ConformerError(f"Chiral symbols must be either `R`, `S`, `NR`, `NS`, `E`, `Z`, got: {symbol}.")
if just_get_the_number:
return sum([val for val in result.values()])
return result
[docs]def get_lowest_diastereomers(label, mol, conformers, diastereomers=None):
"""
Get the 2^(n-1) diastereomers with the lowest energy (where n is the number of chiral centers in the molecule).
We exclude enantiomers (mirror images where ALL chiral centers simultaneously invert).
If a specific diastereomer is given (in an xyz dict form), then only the lowest conformer with the same chirality
will be returned.
Args:
label (str): The species' label.
mol (Molecule): The 2D graph representation of the molecule.
conformers (list): Entries are conformer dictionaries.
diastereomers (list, optional): Entries are xyz's in a dictionary format or conformer structures
representing specific diastereomers to keep.
Raises:
ConformerError: If diastereomers is not None and is of wrong type,
or if conformers with the requested chirality combination could not be generated.
Returns:
list: Entries are lowest energy diastereomeric conformer dictionaries to consider.
"""
# assign chirality properties to all conformers
conformers = determine_chirality(conformers, label, mol)
# initialize the enantiomeric dictionary (includes enantiomers and diastereomers)
# keys are chiral combinations, values are lowest conformers
enantiomers_dict = dict()
for conformer in conformers:
if conformer['FF energy'] is not None:
chirality_tuple = chirality_dict_to_tuple(conformer['chirality'])
if chirality_tuple not in list(enantiomers_dict.keys()):
# this is a new enantiomer, consider it
enantiomers_dict[chirality_tuple] = conformer
elif conformer['FF energy'] < enantiomers_dict[chirality_tuple]['FF energy']:
# found a lower energy conformer with the same chirality, replace
enantiomers_dict[chirality_tuple] = conformer
if diastereomers is None:
# no specific diastereomers were requested
pruned_enantiomers_dict = prune_enantiomers_dict(label, enantiomers_dict)
else:
if isinstance(diastereomers, list):
# make sure entries are conformers, convert if needed
modified_diastereomers = list()
for diastereomer in diastereomers:
if isinstance(diastereomer, str) or isinstance(diastereomer, dict) and 'coords' in diastereomer:
# we'll also accept string format xyz
modified_diastereomers.append({'xyz': converter.check_xyz_dict(diastereomer)})
elif isinstance(diastereomer, dict) and 'xyz' in diastereomer:
modified_diastereomers.append(diastereomer)
else:
raise ConformerError(f'diastereomers entries must be either xyz or conformer dictionaries, '
f'got {type(diastereomer)} for {label}')
diastereomer_confs = [{'xyz': converter.check_xyz_dict(diastereomer)} for diastereomer in diastereomers]
diastereomer_confs = determine_chirality(diastereomer_confs, label, mol)
else:
raise ConformerError(f'diastereomers must be a list of xyz coordinates, got: {type(diastereomers)}')
chirality_tuples = [chirality_dict_to_tuple(conformer['chirality']) for conformer in diastereomer_confs]
new_enantiomers_dict = dict()
for chirality_tuple, conformer in enantiomers_dict.items():
if chirality_tuple in chirality_tuples:
new_enantiomers_dict[chirality_tuple] = conformer
if not new_enantiomers_dict:
raise ConformerError(f'Could not generate conformers with chirality combination:\n{chirality_tuples}')
pruned_enantiomers_dict = prune_enantiomers_dict(label, new_enantiomers_dict)
if len(list(pruned_enantiomers_dict.keys())) and list(pruned_enantiomers_dict.keys())[0] != tuple():
logger.info(f'Considering the following enantiomeric combinations for {label}:\n'
f'{list(pruned_enantiomers_dict.keys())}')
return list(pruned_enantiomers_dict.values())
[docs]def prune_enantiomers_dict(label, enantiomers_dict):
"""
A helper function for screening out enantiomers from the enantiomers_dict, leaving only diastereomers
(so removing all exact mirror images). Note that double bond chiralities 'E' and 'Z' are not mirror images of each
other, and are not pruned out.
Args:
label (str): The species' label.
enantiomers_dict (dict): Keys are chirality tuples, values are conformer structures.
Returns:
dict: The pruned enantiomers_dict.
"""
pruned_enantiomers_dict = dict()
for chirality_tuples, conformer in enantiomers_dict.items():
inversed_chirality_tuples = tuple([(chirality_tuple[0], inverse_chirality_symbol(chirality_tuple[1]))
for chirality_tuple in chirality_tuples])
if chirality_tuples not in pruned_enantiomers_dict and inversed_chirality_tuples not in pruned_enantiomers_dict:
# this combination (or its exact mirror image) was not considered yet
if inversed_chirality_tuples in list(enantiomers_dict.keys()):
# the mirror image exists, check which has a lower energy
inversed_conformer = enantiomers_dict[inversed_chirality_tuples]
if inversed_conformer['FF energy'] is None and conformer['FF energy'] is None:
logger.warning(f'Could not get energies of enantiomers {chirality_tuples} '
f'nor its mirror image {inversed_chirality_tuples} for species {label}')
continue
elif inversed_conformer['FF energy'] is None:
pruned_enantiomers_dict[chirality_tuples] = conformer
elif conformer['FF energy'] is None:
pruned_enantiomers_dict[inversed_chirality_tuples] = inversed_conformer
elif conformer['FF energy'] <= inversed_conformer['FF energy']:
pruned_enantiomers_dict[chirality_tuples] = conformer
else:
pruned_enantiomers_dict[inversed_chirality_tuples] = inversed_conformer
else:
# the mirror image does not exist
pruned_enantiomers_dict[chirality_tuples] = conformer
return pruned_enantiomers_dict
[docs]def inverse_chirality_symbol(symbol):
"""
Inverses a chirality symbol, e.g., the 'R' character to 'S', or 'NS' to 'NR'.
Note that chiral double bonds ('E' and 'Z') must not be inversed (they are not mirror images of each other).
Args:
symbol (str): The chirality symbol.
Raises:
InputError: If ``symbol`` could not be recognized.
Returns:
str: The inverse chirality symbol.
"""
inversion_dict = {'R': 'S', 'S': 'R', 'NR': 'NS', 'NS': 'NR', 'E': 'E', 'Z': 'Z'}
if symbol not in list(inversion_dict.keys()):
raise InputError(f"Recognized chirality symbols are 'R', 'S', 'NR', 'NS', 'E', and 'Z', got {symbol}.")
return inversion_dict[symbol]
[docs]def chirality_dict_to_tuple(chirality_dict):
"""
A helper function for using the chirality dictionary of a conformer as a key in the enantiomers_dict
by converting it to a tuple deterministically.
Args:
chirality_dict (dict): The chirality dictionary of a conformer.
Raises:
ConformerError: If the chirality values are wrong.
Returns:
tuple: A deterministic tuple representation of the chirality dictionary.
"""
# extract carbon sites (values are either 'R' or 'S'), nitrogen sites (values are either 'NR' or 'NS')
# and chiral double bonds (values are either 'E' or 'Z')
c_sites, n_sites, bonds, result = list(), list(), list(), list()
for site, chirality in chirality_dict.items():
if chirality in ['R', 'S']:
c_sites.append((site, chirality))
elif chirality in ['NR', 'NS']:
n_sites.append((site, chirality))
elif chirality in ['E', 'Z']:
bond_site = site if site[0] < site[1] else (site[1], site[0])
bonds.append((bond_site, chirality))
else:
raise ConformerError(f'Chiralities could either be R, S, NR, NS, E, or Z. Got: {chirality}.')
# sort the lists
c_sites.sort(key=lambda entry: entry[0])
n_sites.sort(key=lambda entry: entry[0])
bonds.sort(key=lambda entry: entry[0])
# combine by order
for entry in c_sites + n_sites + bonds:
result.append(entry)
return tuple(result)
[docs]def determine_chirality(conformers, label, mol, force=False):
"""
Determines the Cahn–Ingold–Prelog (CIP) chirality (R or S) of atoms in the conformer,
as well as the CIP chirality of double bonds (E or Z).
Args:
conformers (list): Entries are conformer dictionaries.
label (str): The species' label.
mol (RMG Molecule or RDKit RDMol): The molecule object with connectivity and bond order information.
force (bool, optional): Whether to override data, ``True`` to override, default is ``False``.
Returns:
list: Conformer dictionaries with updated with 'chirality'. ``conformer['chirality']`` is a dictionary.
Keys are either a 1-length tuple of atom indices (for chiral atom centers) or a 2-length tuple of atom
indices (for chiral double bonds), values are either 'R' or 'S' for chiral atom centers
(or 'NR' or 'NS' for chiral nitrogen centers), or 'E' or 'Z' for chiral double bonds.
All atom indices are 0-indexed.
"""
chiral_nitrogen_centers = identify_chiral_nitrogen_centers(mol)
new_mol, elements_to_insert = replace_n_with_c_in_mol(mol, chiral_nitrogen_centers)
for conformer in conformers:
if 'chirality' not in conformer:
# keys are either 1-length atom indices (for chiral atom centers)
# or 2-length atom indices (for chiral double bonds)
# values are either 'R', 'S', 'NR', 'NS', 'E', or 'Z'
conformer['chirality'] = dict()
elif conformer['chirality'] != dict() and not force:
# don't override data
continue
new_xyz = replace_n_with_c_in_xyz(label, mol, conformer['xyz'], chiral_nitrogen_centers, elements_to_insert)
rd_mol = embed_rdkit(label, new_mol, xyz=new_xyz)
if rd_mol is None or not rd_mol.GetNumConformers():
continue
rd_mol.UpdatePropertyCache(strict=False)
Chem.rdmolops.AssignStereochemistryFrom3D(rd_mol, 0)
for i, rd_atom in enumerate(rd_mol.GetAtoms()):
rd_atom_props_dict = rd_atom.GetPropsAsDict()
if '_CIPCode' in list(rd_atom_props_dict.keys()):
if mol.atoms[i].is_nitrogen():
# this is a nitrogen site in the original molecule, mark accordingly
conformer['chirality'][(i,)] = 'N' + rd_atom_props_dict['_CIPCode']
else:
conformer['chirality'][(i,)] = rd_atom_props_dict['_CIPCode']
for rd_bond in rd_mol.GetBonds():
stereo = str(rd_bond.GetStereo())
if stereo in ['STEREOE', 'STEREOZ']:
# possible values are 'STEREOANY', 'STEREOCIS', 'STEREOE', 'STEREONONE', 'STEREOTRANS', and 'STEREOZ'
rd_atoms = [rd_bond.GetBeginAtomIdx(), rd_bond.GetEndAtomIdx()] # indices of atoms bonded by this bond
conformer['chirality'][tuple(rd_atom for rd_atom in rd_atoms)] = stereo[-1]
return conformers
[docs]def identify_chiral_nitrogen_centers(mol):
"""
Identify the atom indices corresponding to a chiral nitrogen centers in a molecule (umbrella modes).
Args:
mol (Molecule): The molecule to be analyzed.
Raises:
TypeError: If ``mol`` is of wrong type.
Returns:
list: Atom numbers (0-indexed) representing chiral nitrogen centers in the molecule (umbrella modes).
"""
if not isinstance(mol, Molecule):
raise TypeError(f'mol must be a Molecule instance, got: {type(mol)}')
chiral_nitrogen_centers = list()
for atom1 in mol.atoms:
if atom1.is_nitrogen() and atom1.lone_pairs == 1 and atom1.radical_electrons == 0 \
and (len(list(atom1.edges.keys())) == 3
or (atom1.radical_electrons == 1 and len(list(atom1.edges.keys())) == 2)):
groups, tops, top_element_counts = list(), list(), list()
for atom2 in atom1.edges.keys():
top = determine_top_group_indices(mol, atom1, atom2, index=0)[0]
tops.append(top)
top_element_counts.append(get_top_element_count(mol, top))
groups.append(to_group(mol, top))
if (top_element_counts[0] != top_element_counts[1] and top_element_counts[1] != top_element_counts[2]) \
or all([not groups[0].is_isomorphic(group, save_order=True) for group in groups[1:]] +
[not groups[-1].is_isomorphic(group, save_order=True) for group in groups[:-1]]):
# if we can say that TWO groups, each separately considered, isn't isomorphic to the others,
# then this nitrogen has all different groups.
chiral_nitrogen_centers.append(mol.atoms.index(atom1))
return chiral_nitrogen_centers
[docs]def replace_n_with_c_in_mol(mol, chiral_nitrogen_centers):
"""
Replace nitrogen atoms (pre-identified as chiral centers) with carbon atoms, replacing the lone electron pair
(assuming just one exists) with a hydrogen or a halogen atom, preserving any radical electrons on the nitrogen atom.
Args:
mol (Molecule): The molecule to be analyzed.
chiral_nitrogen_centers (list): The 0-index of chiral (umbrella mode) nitrogen atoms in the molecule.
Raises:
ConformerError: If any of the atoms indicated by ``chiral_nitrogen_centers`` could not be a chiral nitrogen atom.
Returns:
Tuple[Molecule, list]:
- A copy of the molecule with replaced N atoms.
- Elements inserted in addition to the C atom, ordered as in ``chiral_nitrogen_centers``.
"""
new_mol = mol.copy(deep=True)
inserted_elements = list()
for n_index in chiral_nitrogen_centers:
if not mol.atoms[n_index].is_nitrogen():
raise ConformerError(f'Cannot replace a nitrogen atom index {n_index} if it is not a nitrogen element.')
if mol.atoms[n_index].lone_pairs != 1:
raise ConformerError(f'Cannot replace a nitrogen atom index {n_index} with number of lone pairs '
f'different than one (got: {mol.atoms[n_index].lone_pairs}).')
if mol.atoms[n_index].radical_electrons > 1:
raise ConformerError(f'Cannot replace a nitrogen atom index {n_index} if it has more than one radical '
f'electrons (got: {mol.atoms[n_index].radical_electrons}).')
if any([not bond.is_single() for bond in mol.atoms[n_index].edges.values()]):
raise ConformerError(f'Cannot replace a nitrogen atom index {n_index} if not all of its bonds are single '
f'(got: {[bond.order for bond in mol.atoms[n_index].edges.values()]}).')
new_c_atom = Atom(element=C_ELEMENT, radical_electrons=mol.atoms[n_index].radical_electrons,
charge=mol.atoms[n_index].charge, lone_pairs=0, id=mol.atoms[n_index].id)
new_c_atom.edges = dict()
for atom2 in mol.atoms[n_index].edges.keys():
# delete bonds from all other atoms connected to the atom represented by n_index
del new_mol.atoms[mol.atoms.index(atom2)].edges[new_mol.atoms[n_index]]
new_mol.vertices[n_index] = new_c_atom
h, f, cl = False, False, False # mark hydrogen, fluorine, and chlorine neighbors of the original atom
for atom2 in mol.atoms[n_index].edges.keys():
new_mol.add_bond(Bond(atom1=new_c_atom, atom2=new_mol.atoms[mol.atoms.index(atom2)], order=1))
if atom2.is_hydrogen():
h = True
elif atom2.is_fluorine():
f = True
elif atom2.is_chlorine():
cl = True
if not h:
additional_element = H_ELEMENT
inserted_elements.append('H')
elif not f:
additional_element = F_ELEMENT
inserted_elements.append('F')
elif not cl:
additional_element = Cl_ELEMENT
inserted_elements.append('Cl')
else:
# this can only happen if the molecule is NHFCl (ammonia substituted with one F and one Cl), use iodine
additional_element = I_ELEMENT
inserted_elements.append('I')
new_atom = Atom(element=additional_element, radical_electrons=0, charge=0,
lone_pairs=0 if additional_element.number == 1 else 3)
new_atom.edges = dict()
# new_mol.add_atom(new_atom)
new_mol.vertices.append(new_atom)
new_bond = Bond(atom1=new_c_atom, atom2=new_atom, order=1)
new_mol.add_bond(new_bond)
return new_mol, inserted_elements
[docs]def replace_n_with_c_in_xyz(label, mol, xyz, chiral_nitrogen_centers, elements_to_insert):
"""
Replace nitrogen atoms (pre-identified as chiral centers) with carbon atoms, replacing the lone electron pair
(assuming just one exists) with a hydrogen or a halogen atom.
Args:
label (str): The species label.
mol (Molecule): The respective molecule object.
xyz (dict): The 3D coordinates to process.
chiral_nitrogen_centers (list): The 0-index of chiral (umbrella mode) nitrogen atoms in the molecule.
elements_to_insert (list): The element (H/F/Cl/I) to insert in addition to C per nitrogen center.
Returns:
dict: The coordinates with replaced N atoms.
"""
symbols = list(copy.copy(xyz['symbols']))
isotopes = list(copy.copy(xyz['isotopes'])) if 'isotopes' in xyz else None
coords = converter.xyz_to_coords_list(xyz)
for n_index, element_to_insert in zip(chiral_nitrogen_centers, elements_to_insert):
symbols[n_index] = 'C'
if isotopes is not None:
isotopes[n_index] = 12
if element_to_insert == 'H':
symbol, isotope, distance = 'H', 1, 1.1
elif element_to_insert == 'F':
symbol, isotope, distance = 'F', 19, 2.0
elif element_to_insert == 'Cl':
symbol, isotope, distance = 'Cl', 35, 1.77
elif element_to_insert == 'I':
symbol, isotope, distance = 'I', 127, 2.14
else:
raise ConformerError(f'Element to insert must be either H, F, Cl, or I. Got: {element_to_insert}')
symbols.append(symbol)
if isotopes is not None:
isotopes.append(isotope)
lp_vector = vectors.set_vector_length(vectors.get_lp_vector(label, mol, xyz, n_index), distance)
lp_vector[0] += coords[n_index][0]
lp_vector[1] += coords[n_index][1]
lp_vector[2] += coords[n_index][2]
coords.append(lp_vector)
new_xyz = converter.xyz_from_data(coords=coords, symbols=symbols, isotopes=isotopes)
return new_xyz
[docs]def get_top_element_count(mol, top):
"""
Returns the element count for the molecule considering only the atom indices in ``top``.
Args:
mol (Molecule): The molecule to consider.
top (list): The atom indices to consider.
Returns:
dict: The element count, keys are tuples of (element symbol, isotope number), values are counts.
"""
if not isinstance(top, list):
top = list(top)
element_count = {}
for i, atom in enumerate(mol.atoms):
if i in top:
key = (atom.element.symbol, atom.element.isotope)
if key in element_count:
element_count[key] += 1
else:
element_count[key] = 1
return element_count
[docs]def initialize_log(verbose=logging.INFO):
"""
Set up a simple logger for stdout printing (not saving into as log file).
Args:
verbose (int, optional): Specify the amount of log text seen.
"""
logger.setLevel(verbose)
logger.propagate = False
# Use custom level names for cleaner log output
logging.addLevelName(logging.CRITICAL, 'Critical: ')
logging.addLevelName(logging.ERROR, 'Error: ')
logging.addLevelName(logging.WARNING, 'Warning: ')
logging.addLevelName(logging.INFO, '')
logging.addLevelName(logging.DEBUG, '')
logging.addLevelName(0, '')
# Create formatter and add to handlers
formatter = logging.Formatter('%(levelname)s%(message)s')
# Remove old handlers before adding ours
while logger.handlers:
logger.removeHandler(logger.handlers[0])
# Create console handler; send everything to stdout rather than stderr
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(verbose)
ch.setFormatter(formatter)
logger.addHandler(ch)