Source code for arc.reaction

"""
A module for representing a reaction.
"""

from typing import TYPE_CHECKING, List, Optional, Tuple, Union

from qcelemental.exceptions import ValidationError
from qcelemental.models.molecule import Molecule as QCMolecule

from rmgpy.reaction import Reaction
from rmgpy.species import Species

import arc.rmgdb as rmgdb
from arc.common import extremum_list, get_logger
from arc.exceptions import ReactionError, InputError
from arc.imports import settings
from arc.species.converter import check_xyz_dict, sort_xyz_using_indices, xyz_to_str
from arc.species.species import ARCSpecies, check_atom_balance, check_label

if TYPE_CHECKING:
    from rmgpy.data.rmg import RMGDatabase


logger = get_logger()


default_ts_methods = settings['default_ts_methods']


[docs]class ARCReaction(object): """ A class for representing a chemical reaction. Either give reactants and products (just list of labels corresponding to :ref:`ARCSpecies <species>`), a reaction label, or an RMG Reaction object. If the reactants and products in the RMG Reaction aren't ARCSpecies, they will be created. The ARCReaction object stores the labels corresponding to the reactants, products and TS ARCSpecies objects as self.reactants, self.products, and self.ts_label, respectively. Args: label (str, optional): The reaction's label in the format `r1 + r2 <=> p1 + p2` (or unimolecular on either side, as appropriate). reactants (list, optional): A list of reactant *labels* corresponding to an :ref:`ARCSpecies <species>`. products (list, optional): A list of product *labels* corresponding to an :ref:`ARCSpecies <species>`. r_species (list, optional): A list of reactants :ref:`ARCSpecies <species>` objects. p_species (list, optional): A list of products :ref:`ARCSpecies <species>` objects. ts_label (str, optional): The :ref:`ARCSpecies <species>` label of the respective TS. rmg_reaction (Reaction, optional): An RMG Reaction class. ts_methods (list, optional): Methods to try for generating TS guesses. If an ARCSpecies is a TS and ts_methods is empty (passing an empty list), then xyz (user guess) must be given. ts_xyz_guess (list, optional): A list of TS XYZ user guesses, each in a string format. multiplicity (int, optional): The reaction surface multiplicity. A trivial guess will be made unless provided. charge (int, optional): The reaction surface charge. reaction_dict (dict, optional): A dictionary to create this object from (used when restarting ARC). species_list (list, optional): A list of ARCSpecies entries for matching reactants and products to existing species. preserve_param_in_scan (list, optional): Entries are length two iterables of atom indices (1-indexed) between which distances and dihedrals of these pivots must be preserved. Used for identification of rotors which break a TS. Attributes: label (str): The reaction's label in the format `r1 + r2 <=> p1 + p2` (or unimolecular on either side, as appropriate). family (KineticsFamily): The RMG kinetic family, if applicable. family_own_reverse (bool): Whether the RMG family is its own reverse. reactants (list): A list of reactants labels corresponding to an :ref:`ARCSpecies <species>`. products (list): A list of products labels corresponding to an :ref:`ARCSpecies <species>`. r_species (list): A list of reactants :ref:`ARCSpecies <species>` objects. p_species (list): A list of products :ref:`ARCSpecies <species>` objects. ts_species (ARCSpecies): The :ref:`ARCSpecies <species>` corresponding to the reaction's TS. dh_rxn298 (float): The heat of reaction at 298K. kinetics (Arrhenius): The high pressure limit rate coefficient calculated by ARC. rmg_kinetics (Arrhenius): The kinetics generated by RMG, for reality-check. rmg_reaction (Reaction): An RMG Reaction class. rmg_reactions (list): A list of RMG Reaction objects with RMG rates for comparisons. long_kinetic_description (str): A description for the species entry in the thermo library outputted. ts_methods (list): Methods to try for generating TS guesses. If an ARCSpecies is a TS and ts_methods is empty (passing an empty list), then xyz (user guess) must be given. ts_xyz_guess (list): A list of TS XYZ user guesses, each in a string format. multiplicity (int): The reaction surface multiplicity. A trivial guess will be made unless provided. charge (int): The reaction surface charge. index (int): An auto-generated index associating the ARCReaction object with the corresponding TS :ref:`ARCSpecies <species>` object. ts_label (str): The :ref:`ARCSpecies <species>` label of the respective TS. preserve_param_in_scan (list): Entries are length two iterables of atom indices (1-indexed) between which distances and dihedrals of these pivots must be preserved. atom_map (List[int]): An atom map, mapping the reactant atoms to the product atoms. I.e., an atom map of [0, 2, 1] means that reactant atom 0 matches product atom 0, reactant atom 1 matches product atom 2, and reactant atom 2 matches product atom 1. done_opt_r_n_p (bool): Whether the optimization of all reactants and products is complete. """ def __init__(self, label: str = '', reactants: Optional[List[str]] = None, products: Optional[List[str]] = None, r_species: Optional[List[ARCSpecies]] = None, p_species: Optional[List[ARCSpecies]] = None, ts_label: Optional[str] = None, rmg_reaction: Optional[Reaction] = None, ts_methods: Optional[List[str]] = None, ts_xyz_guess: Optional[list] = None, multiplicity: Optional[int] = None, charge: Optional[int] = None, reaction_dict: Optional[dict] = None, species_list: Optional[List[ARCSpecies]] = None, preserve_param_in_scan: Optional[list] = None, ): self.arrow = ' <=> ' self.plus = ' + ' self.r_species = r_species or list() self.p_species = p_species or list() self.kinetics = None self.rmg_kinetics = None self.long_kinetic_description = '' self.family = None self.family_own_reverse = 0 self.ts_label = ts_label self.dh_rxn298 = None self.rmg_reactions = None self.ts_xyz_guess = ts_xyz_guess or list() self.preserve_param_in_scan = preserve_param_in_scan self._atom_map = None self._multiplicity = multiplicity self._charge = charge if reaction_dict is not None: # Reading from a dictionary self.from_dict(reaction_dict=reaction_dict, species_list=species_list) else: # Not reading from a dictionary self.label = label self.index = None self.ts_species = None reactants = reactants or [spc.label for spc in self.r_species] self.reactants = [check_label(reactant)[0] for reactant in reactants] if reactants else list() products = products or [spc.label for spc in self.p_species] or None self.products = [check_label(product)[0] for product in products] if products else list() self.rmg_reaction = rmg_reaction if self.rmg_reaction is None and not self.label \ and not (len(self.reactants) * len(self.products)) \ and not (len(self.r_species) * len(self.p_species)): raise InputError(f'Cannot determine reactants and/or products labels for reaction {self.label}') self.set_label_reactants_products() self.ts_methods = ts_methods if ts_methods is not None else default_ts_methods self.ts_methods = [tsm.lower() for tsm in self.ts_methods] self.ts_xyz_guess = ts_xyz_guess if ts_xyz_guess is not None else list() self.done_opt_r_n_p = None if len(self.reactants) > 3 or len(self.products) > 3: raise ReactionError(f'An ARC Reaction can have up to three reactants / products. got {len(self.reactants)} ' f'reactants and {len(self.products)} products for reaction {self.label}.') if self.ts_xyz_guess is not None and not isinstance(self.ts_xyz_guess, list): self.ts_xyz_guess = [self.ts_xyz_guess] self.arc_species_from_rmg_reaction() self.remove_dup_species() self.check_atom_balance() @property def atom_map(self): """The reactants to products atom map""" if self._atom_map is None \ and all(species.get_xyz(generate=False) is not None for species in self.r_species + self.p_species): self._atom_map = self.get_atom_map() return self._atom_map @atom_map.setter def atom_map(self, value): """Allow setting the atom map""" self._atom_map = value @property def charge(self): """The net electric charge of the reaction PES""" if self._charge is None: if len(self.r_species): self._charge = self.get_rxn_charge() elif self.rmg_reaction is not None: self._charge = sum([spc.molecule[0].get_net_charge() for spc in self.rmg_reaction.reactants + self.rmg_reaction.products]) return self._charge @charge.setter def charge(self, value): """Allow setting the reaction charge""" if value is not None and not isinstance(value, int): raise InputError(f'Reaction charge must be an integer, got {value} which is a {type(value)}.') self._charge = value @property def multiplicity(self): """The electron spin multiplicity of the reaction PES""" if self._multiplicity is None: self._multiplicity = self.get_rxn_multiplicity() return self._multiplicity @multiplicity.setter def multiplicity(self, value): """Allow setting the reaction multiplicity""" self._multiplicity = value if value is not None: if not isinstance(value, int): raise InputError(f'Reaction multiplicity must be an integer, got {value} which is a {type(value)}.') logger.info(f'Setting multiplicity of reaction {self.label} to {self._multiplicity}') def __str__(self) -> str: """Return a string representation of the object""" str_representation = f'ARCReaction(' str_representation += f'label="{self.label}", ' str_representation += f'rmg_reaction="{self.rmg_reaction}", ' if self.preserve_param_in_scan is not None: str_representation += f'preserve_param_in_scan="{self.preserve_param_in_scan}", ' str_representation += f'multiplicity={self.multiplicity}, ' str_representation += f'charge={self.charge})' return str_representation
[docs] def as_dict(self) -> dict: """A helper function for dumping this object as a dictionary in a YAML file for restarting ARC""" reaction_dict = dict() reaction_dict['label'] = self.label reaction_dict['index'] = self.index reaction_dict['multiplicity'] = self.multiplicity reaction_dict['charge'] = self.charge reaction_dict['reactants'] = self.reactants reaction_dict['products'] = self.products reaction_dict['r_species'] = [spc.as_dict() for spc in self.r_species] reaction_dict['p_species'] = [spc.as_dict() for spc in self.p_species] if self.ts_species is not None: reaction_dict['ts_species'] = self.ts_species.as_dict() if self._atom_map is not None: reaction_dict['atom_map'] = self._atom_map if self.done_opt_r_n_p is not None: reaction_dict['done_opt_r_n_p'] = self.done_opt_r_n_p if self.preserve_param_in_scan is not None: reaction_dict['preserve_param_in_scan'] = self.preserve_param_in_scan if 'rmg_reaction' in reaction_dict: reaction_dict['rmg_reaction'] = self.rmg_reaction_to_str() if self.family is not None: reaction_dict['family'] = self.family.label reaction_dict['family_own_reverse'] = self.family_own_reverse reaction_dict['long_kinetic_description'] = self.long_kinetic_description reaction_dict['label'] = self.label reaction_dict['ts_methods'] = self.ts_methods reaction_dict['ts_xyz_guess'] = self.ts_xyz_guess reaction_dict['ts_label'] = self.ts_label return reaction_dict
[docs] def from_dict(self, reaction_dict: dict, species_list: Optional[list] = None, ): """ A helper function for loading this object from a dictionary in a YAML file for restarting ARC. """ self.index = reaction_dict['index'] if 'index' in reaction_dict else None self.label = reaction_dict['label'] if 'label' in reaction_dict else '' self.multiplicity = reaction_dict['multiplicity'] if 'multiplicity' in reaction_dict else None self.charge = reaction_dict['charge'] if 'charge' in reaction_dict else 0 self.reactants = reaction_dict.get('reactants') or list() self.products = reaction_dict.get('products') or list() if 'family' in reaction_dict and reaction_dict['family'] is not None: db = rmgdb.make_rmg_database_object() rmgdb.load_families_only(db) self.family = rmgdb.get_family(rmgdb=db, label=reaction_dict['family']) self.family.save_order = True else: self.family = None self.family_own_reverse = reaction_dict['family_own_reverse'] if 'family_own_reverse' in reaction_dict else 0 if 'rmg_reaction' in reaction_dict: self.rmg_reaction_from_str(reaction_string=reaction_dict['rmg_reaction']) else: self.rmg_reaction = None if self.rmg_reaction is None and not (len(self.reactants) * len(self.products)): raise InputError(f'Cannot determine reactants and/or products labels for reaction {self.label}') if not (len(self.reactants) * len(self.products)): if not all([spc.label for spc in self.rmg_reaction.reactants + self.rmg_reaction.products]): raise InputError(f'All species in a reaction must be labeled (and the labels must correspond ' f'to respective Species in ARC). If an RMG Reaction object was passes, make ' f'sure that all species in the reactants and products are correctly labeled. ' f'Problematic reaction: {self.label}') self.reactants = [check_label(spc.label)[0] for spc in self.rmg_reaction.reactants] self.products = [check_label(spc.label)[0] for spc in self.rmg_reaction.products] self.set_label_reactants_products(species_list) if self.ts_label is None: self.ts_label = reaction_dict['ts_label'] if 'ts_label' in reaction_dict else None self.r_species = [ARCSpecies(species_dict=r_dict) for r_dict in reaction_dict['r_species']] \ if 'r_species' in reaction_dict else list() self.p_species = [ARCSpecies(species_dict=p_dict) for p_dict in reaction_dict['p_species']] \ if 'p_species' in reaction_dict else list() self.reactants = self.reactants or [spc.label for spc in self.r_species] self.products = self.products or [spc.label for spc in self.p_species] self.ts_species = reaction_dict['ts_species'].from_dict() if 'ts_species' in reaction_dict else None self.long_kinetic_description = reaction_dict['long_kinetic_description'] \ if 'long_kinetic_description' in reaction_dict else '' self.ts_methods = reaction_dict['ts_methods'] if 'ts_methods' in reaction_dict else default_ts_methods self.ts_methods = [tsm.lower() for tsm in self.ts_methods] self.ts_xyz_guess = reaction_dict['ts_xyz_guess'] if 'ts_xyz_guess' in reaction_dict else list() self.preserve_param_in_scan = reaction_dict['preserve_param_in_scan'] \ if 'preserve_param_in_scan' in reaction_dict else None self.atom_map = reaction_dict['atom_map'] if 'atom_map' in reaction_dict else None self.done_opt_r_n_p = reaction_dict['done_opt_r_n_p'] if 'done_opt_r_n_p' in reaction_dict else None
[docs] def is_isomerization(self): """ Determine whether this is an isomerization reaction. Returns: bool: Whether this is an isomerization reaction. """ return True if len(self.r_species) == 1 and len(self.p_species) == 1 else False
[docs] def set_label_reactants_products(self, species_list: Optional[List[ARCSpecies]] = None): """A helper function for settings the label, reactants, and products attributes for a Reaction""" # First make sure that reactants and products labels are defined (most often used). if not len(self.reactants) or not len(self.products): if self.label: if self.arrow not in self.label: raise ReactionError(f'A reaction label must contain an arrow ("{self.arrow}")') reactants, products = self.label.split(self.arrow) if self.plus in reactants: self.reactants = reactants.split(self.plus) else: self.reactants = [reactants] if self.plus in products: self.products = products.split(self.plus) else: self.products = [products] if species_list is not None: if len(self.reactants) and len(self.products): labels = [spc.label for spc in species_list] for spc_label in self.reactants + self.products: if spc_label not in labels: raise ValueError(f'The species {spc_label} appears in the reaction label\n' f'{self.label}\n' f'Yet no species with a corresponding label was defined in ARC.') if not len(self.r_species) and len(self.reactants): self.r_species = [spc for spc in species_list if spc.label in self.reactants] if not len(self.p_species) and len(self.products): self.p_species = [spc for spc in species_list if spc.label in self.products] elif self.rmg_reaction is not None: self.reactants = [r.label for r in self.rmg_reaction.reactants] self.products = [p.label for p in self.rmg_reaction.products] elif len(self.r_species) and len(self.p_species): self.reactants = [r.label for r in self.r_species] self.products = [p.label for p in self.p_species] if not self.label: if len(self.reactants) and len(self.products): self.label = self.arrow.join([self.plus.join(r for r in self.reactants), self.plus.join(p for p in self.products)]) elif len(self.r_species) and len(self.p_species): self.label = self.arrow.join([self.plus.join(r.label for r in self.r_species), self.plus.join(p.label for p in self.p_species)]) elif self.rmg_reaction is not None: # This will probably never be executed, but OK to keep. self.label = self.arrow.join([self.plus.join(r.label for r in self.rmg_reaction.reactants), self.plus.join(p.label for p in self.rmg_reaction.products)]) if self.rmg_reaction is None: self.rmg_reaction_from_arc_species() elif not self.label and not (len(self.reactants) * len(self.products)): raise ReactionError('Either a label or reactants and products lists must be specified') self.reactants = [check_label(reactant)[0] for reactant in self.reactants] self.products = [check_label(product)[0] for product in self.products] if bool(len(self.reactants)) ^ bool(len(self.products)): raise ReactionError(f'Both the reactants and products must be specified for a reaction, ' f'got: reactants = {self.reactants}, products = {self.products}.')
[docs] def rmg_reaction_to_str(self) -> str: """A helper function for dumping the RMG Reaction object as a string for the YAML restart dictionary""" return self.arrow.join([self.plus.join(r.molecule[0].copy(deep=True).to_smiles() for r in self.rmg_reaction.reactants), self.plus.join(p.molecule[0].copy(deep=True).to_smiles() for p in self.rmg_reaction.products)])
[docs] def rmg_reaction_from_str(self, reaction_string: str): """A helper function for regenerating the RMG Reaction object from a string representation""" reactants, products = reaction_string.split(self.arrow) reactants = [Species(smiles=smiles) for smiles in reactants.split(self.plus)] products = [Species(smiles=smiles) for smiles in products.split(self.plus)] self.rmg_reaction = Reaction(reactants=reactants, products=products)
[docs] def rmg_reaction_from_arc_species(self): """ A helper function for generating the RMG Reaction object from ARCSpecies Used for determining the family. """ if self.rmg_reaction is None and len(self.r_species) and len(self.p_species) and \ all([arc_spc.mol is not None for arc_spc in self.r_species + self.p_species]): reactants, products = self.get_reactants_and_products(arc=False) # Returns RMG Species. self.rmg_reaction = Reaction(reactants=reactants, products=products)
[docs] def arc_species_from_rmg_reaction(self): """ A helper function for generating the ARC Species (.r_species and .p_species) from the RMG Reaction object """ if self.rmg_reaction is not None and not len(self.r_species) and not len(self.p_species): self.r_species, self.p_species = list(), list() for i, rmg_reactant in enumerate(self.rmg_reaction.reactants): if len(self.reactants) > i: label = self.reactants[i] else: label = rmg_reactant.label or rmg_reactant.molecule[0].to_smiles() label = check_label(label)[0] self.r_species.append(ARCSpecies(label=label, mol=rmg_reactant.molecule[0])) for i, rmg_product in enumerate(self.rmg_reaction.products): if len(self.products) > i: label = self.products[i] else: label = rmg_product.label or rmg_product.molecule[0].to_smiles() label = check_label(label)[0] self.p_species.append(ARCSpecies(label=label, mol=rmg_product.molecule[0]))
[docs] def get_rxn_charge(self): """A helper function for determining the surface charge""" if len(self.r_species): return sum([r.charge for r in self.r_species])
[docs] def get_rxn_multiplicity(self): """A helper function for determining the surface multiplicity""" reactants, products = self.get_reactants_and_products(arc=True) multiplicity = None ordered_r_mult_list, ordered_p_mult_list = list(), list() if len(reactants): if len(reactants) == 1: return reactants[0].multiplicity if len(products) == 1: return products[0].multiplicity ordered_r_mult_list = sorted([r_spc.multiplicity for r_spc in reactants]) ordered_p_mult_list = sorted([p_spc.multiplicity for p_spc in products]) elif self.rmg_reaction is not None: if len(self.rmg_reaction.reactants) == 1: return self.rmg_reaction.reactants[0].molecule[0].multiplicity if len(self.rmg_reaction.products) == 1: return self.rmg_reaction.products[0].molecule[0].multiplicity ordered_r_mult_list = sorted([r_spc.molecule[0].multiplicity for r_spc in self.rmg_reaction.reactants]) ordered_p_mult_list = sorted([p_spc.molecule[0].multiplicity for p_spc in self.rmg_reaction.products]) for list_1, list_2 in [(ordered_r_mult_list, ordered_p_mult_list), (ordered_p_mult_list, ordered_r_mult_list)]: if all(m == 1 for m in list_1) and multiplicity is None: multiplicity = 1 # S + S = S or T break if all(m == 2 for m in list_1) and len(list_1) == 2 \ and all(m == 2 for m in list_2) and len(list_2) == 2 and multiplicity is None: multiplicity = 1 # D + D = S or T break if 2 in list_1 and all(m == 1 for i, m in enumerate(list_1) if i != list_1.index(2)): multiplicity = 2 # S + D = D break if 3 in list_1 and all(m == 1 for i, m in enumerate(list_1) if i != list_1.index(3)): multiplicity = 3 # S + T = T break if 4 in list_1 and all(m == 1 for i, m in enumerate(list_1) if i != list_1.index(4)): multiplicity = 4 # S + Q = Q break if all(m == 2 for m in list_1): # D + D = S or T # D + D + D = D or Q if len(list_1) % 2 == 0: # even number of D's in list_1, m must be an odd number if any(m > 2 for m in list_2): multiplicity = max(list_2) if max(list_2) % 2 == 1 else max(list_2) - 1 else: # odd number of D's in list_1, m must be even multiplicity = max(list_2) if max(list_2) % 2 == 0 else max(list_2) - 1 if all(m == 3 for m in list_1): # T + T = S or P # T + T + T = T or 7 if len(list_1) % 2 == 0: # even number of T's in list_1, m must be 1 or 5 multiplicity = 1 logger.warning(f'ASSUMING a multiplicity of 1 (singlet) for reaction {self.label}') else: # odd number of D's in list_1, m must be 3 or 7 multiplicity = 3 logger.warning(f'ASSUMING a multiplicity of 3 (triplet) for reaction {self.label}') if list_1 == [2, 3] and 4 not in list_2: # D + T = D or Q multiplicity = 2 logger.warning(f'ASSUMING a multiplicity of 2 (doublet) for reaction {self.label}') if multiplicity is None: logger.error(f'Could not determine multiplicity for reaction {self.label}') return None return multiplicity
[docs] def determine_family(self, rmg_database: 'RMGDatabase', save_order: bool = True, ): """ Determine the RMG family. Populates the .family, and .family_own_reverse attributes. A wrapper for rmgdb determine_reaction_family() function. Args: rmg_database (RMGDatabase): The RMG database instance. save_order (bool, optional): Whether to retain atomic order of the RMG ``reaction`` object instance. """ if self.rmg_reaction is not None: self.family, self.family_own_reverse = rmgdb.determine_reaction_family(rmgdb=rmg_database, reaction=self.rmg_reaction.copy(), save_order=save_order, )
[docs] def check_ts(self, verbose: bool = True, parameter: str = 'E0', ) -> bool: """ Check that the TS E0 or electronic energy is above both reactant and product wells. By default E0 is checked first. If it is not available for all species and TS, the electronic energy is checked. If the check cannot be performed, the method still returns ``True``. Args: verbose (bool, optional): Whether to print logging messages. parameter (str, optional): The energy parameter to consider ('E0' or 'e_elect'). Returns: bool: Whether the TS E0 or electronic energy is above both reactant and product wells, ``True`` if it is. """ if parameter not in ['E0', 'e_elect']: raise ValueError(f"The energy parameter must be either 'E0' or 'e_elect', got: {parameter}") r_e0 = None if any([spc.e0 is None for spc in self.r_species]) \ else sum(spc.e0 * self.get_species_count(species=spc, well=0) for spc in self.r_species) p_e0 = None if any([spc.e0 is None for spc in self.p_species]) \ else sum(spc.e0 * self.get_species_count(species=spc, well=1) for spc in self.p_species) ts_e0 = self.ts_species.e0 r_e_elect = None if any([spc.e_elect is None for spc in self.r_species]) \ else sum(spc.e_elect * self.get_species_count(species=spc, well=0) for spc in self.r_species) p_e_elect = None if any([spc.e_elect is None for spc in self.p_species]) \ else sum(spc.e_elect * self.get_species_count(species=spc, well=1) for spc in self.p_species) ts_e_elect = self.ts_species.e_elect r_e = r_e0 if parameter == 'E0' else r_e_elect p_e = p_e0 if parameter == 'E0' else p_e_elect ts_e = ts_e0 if parameter == 'E0' else ts_e_elect min_e = extremum_list([r_e, p_e, ts_e], return_min=True) e_str = 'E0' if parameter == 'E0' else 'electronic energy' if any([val is None for val in [r_e, p_e, ts_e]]): if verbose: if e_str != 'E0': logger.info('\n') logger.error(f"Could not get {e_str} of all species in reaction {self.label}. Cannot check TS.\n") r_text = f'{r_e:.2f} kJ/mol' if r_e is not None else 'None' ts_text = f'{ts_e:.2f} kJ/mol' if ts_e is not None else 'None' p_text = f'{p_e:.2f} kJ/mol' if p_e is not None else 'None' logger.info(f"Reactants {e_str}: {r_text}\n" f"TS {e_str}: {ts_text}\n" f"Products {e_str}: {p_text}") if parameter == 'E0': # Use e_elect instead: return self.check_ts(verbose=verbose, parameter='e_elect') return True if ts_e < r_e or ts_e < p_e: if verbose: logger.error(f'\nTS of reaction {self.label} has a lower E0 value than expected:\n') logger.info(f'Reactants: {r_e - min_e:.2f} kJ/mol\n' f'TS: {ts_e - min_e:.2f} kJ/mol' f'\nProducts: {p_e - min_e:.2f} kJ/mol') return False if verbose: logger.info(f'\nReaction {self.label} has the following path E0 energies:\n' f'Reactants: {r_e - min_e:.2f} kJ/mol\n' f'TS: {ts_e - min_e:.2f} kJ/mol\n' f'Products: {p_e - min_e:.2f} kJ/mol') return True
[docs] def check_attributes(self): """Check that the Reaction object is defined correctly""" self.set_label_reactants_products() if not self.label: raise ReactionError('A reaction seems to not be defined correctly') if self.arrow not in self.label: raise ReactionError(f'A reaction label must include a double ended arrow with spaces on both ' f'sides: "{self.arrow}". Got:{self.label}') if '+' in self.label and self.plus not in self.label: raise ReactionError(f'Reactants or products in a reaction label must separated with {self.plus} ' f'(has spaces on both sides). Got:{self.label}') species_labels = self.label.split(self.arrow) reactants = [check_label(reactant)[0] for reactant in species_labels[0].split(self.plus)] products = [check_label(product)[0] for product in species_labels[1].split(self.plus)] if len(self.reactants): for reactant in reactants: if reactant not in self.reactants: raise ReactionError(f'Reactant {reactant} from the reaction label {self.label} ' f'is not in self.reactants ({self.reactants})') for reactant in self.reactants: if reactant not in reactants: raise ReactionError(f'Reactant {reactant} is not in the reaction label ({self.label})') if len(self.products): for product in products: if product not in self.products: raise ReactionError(f'Product {product} from the reaction {self.label} ' f'is not in self.products ({self.products})') for product in self.products: if product not in products: raise ReactionError(f'Product {product} is not in the reaction label ({self.label})') if len(self.r_species): for reactant in self.r_species: if reactant.label not in self.reactants: raise ReactionError(f'Reactant {reactant.label} from {self.label} ' f'is not in self.reactants ({self.reactants})') for reactant in reactants: if reactant not in [r.label for r in self.r_species]: raise ReactionError(f'Reactant {reactant} from the reaction label {self.label} ' f'is not in self.r_species ({[r.label for r in self.r_species]})') for reactant in self.reactants: if reactant not in [r.label for r in self.r_species]: raise ReactionError(f'Reactant {reactant} is not in ' f'self.r_species ({[r.label for r in self.r_species]})') if len(self.p_species): for product in self.p_species: if product.label not in self.products: raise ReactionError(f'Product {product.label} from {self.label} ' f'is not in self.products ({self.reactants})') for product in products: if product not in [p.label for p in self.p_species]: raise ReactionError(f'Product {product} from the reaction label {self.label} ' f'is not in self.p_species ({[p.label for p in self.p_species]})') for product in self.products: if product not in [p.label for p in self.p_species]: raise ReactionError(f'Product {product} is not in ' f'self.p_species ({[p.label for p in self.p_species]})')
[docs] def remove_dup_species(self): """ Make sure each species is consider only once in reactants, products, r_species, and p_species. The same species in the reactants/products is considered through get_species_count(). """ self.reactants = sorted(list(set(self.reactants))) self.products = sorted(list(set(self.products))) self.r_species = remove_dup_species(self.r_species) self.p_species = remove_dup_species(self.p_species)
[docs] def check_done_opt_r_n_p(self): """ Check whether the ``final_xyz`` attributes of all ``r_species`` and ``p_species`` are populated, and flag ``self.done_opt_r_n_p`` as ``True`` if they are. Useful to know when to spawn TS search jobs. """ if not self.done_opt_r_n_p: self.done_opt_r_n_p = all(spc.final_xyz is not None for spc in self.r_species + self.p_species)
[docs] def check_atom_balance(self, ts_xyz: Optional[dict] = None, raise_error: bool = True, ) -> bool: """ Check atom balance between reactants, TSs, and product wells. Args: ts_xyz (Optional[dict]): An alternative TS xyz to check. If unspecified, user guesses and the ts_species will be checked. raise_error (bool, optional): Whether to raise an error if an imbalance is found. Raises: ReactionError: If not all wells and TSs are atom balanced. The exception is not raised if ``raise_error`` is ``False``. Returns: bool: Whether all wells and TSs are atom balanced. """ balanced_wells, balanced_ts_xyz, balanced_xyz_guess, balanced_ts_species_mol, balanced_ts_species_xyz = \ True, True, True, True, True r_well, p_well = '', '' for reactant in self.r_species: count = self.get_species_count(species=reactant, well=0) xyz = reactant.get_xyz(generate=True) if xyz is not None and xyz: r_well += (xyz_to_str(xyz) + '\n') * count else: r_well = '' break for product in self.p_species: count = self.get_species_count(species=product, well=1) xyz = product.get_xyz(generate=True) if xyz is not None and xyz: p_well += (xyz_to_str(xyz) + '\n') * count else: p_well = '' break if r_well: for xyz_guess in self.ts_xyz_guess: balanced_xyz_guess *= check_atom_balance(entry_1=xyz_guess, entry_2=r_well) if p_well: balanced_wells = check_atom_balance(entry_1=r_well, entry_2=p_well) if ts_xyz: balanced_ts_xyz = check_atom_balance(entry_1=ts_xyz, entry_2=r_well) if self.ts_species is not None: if self.ts_species.mol is not None: balanced_ts_species_mol = check_atom_balance(entry_1=self.ts_species.mol, entry_2=r_well) ts_xyz = self.ts_species.get_xyz() if ts_xyz is not None: balanced_ts_species_xyz = check_atom_balance(entry_1=self.ts_species.get_xyz(), entry_2=r_well) if not balanced_wells: logger.error(f'The reactant(s) and product(s) wells of reaction {self.label}, are not atom balanced.') if not balanced_ts_xyz: logger.error(f'The generated TS xyz for reaction {self.label} ' f'is not atom balances with the reactant(s) well.') if not balanced_ts_species_mol: logger.error(f'The TS mol for reaction {self.label} is not atom balances with the reactant(s) well.') if not balanced_ts_species_xyz: logger.error(f'The TS coordinates for reaction {self.label} ' f'are not atom balances with the reactant(s) well.') if not balanced_xyz_guess: logger.error(f'Check TS xyz user guesses of reaction {self.label}, ' f'some are not atom balances with the reactant(s) well.') if not all([balanced_wells, balanced_ts_xyz, balanced_ts_species_mol, balanced_ts_species_xyz, balanced_xyz_guess]): if raise_error: raise ReactionError(f'The Reaction {self.label} is not atom balanced.\n' f'balanced wells: {balanced_wells}\n' f'balanced ts xyz: {balanced_ts_xyz}\n' f'balanced ts species mol: {balanced_ts_species_mol}\n' f'balanced ts species xyz: {balanced_ts_species_xyz}\n' f'balanced xyz guess: {balanced_xyz_guess}') return False return True
[docs] def get_species_count(self, species: Optional[ARCSpecies] = None, label: Optional[str] = None, well: int = 0, ) -> int: """ Get the number of times a species participates in the reactants or products well. Either ``species`` or ``label`` must be given. Args: species (ARCSpecies, optional): The species to check. label (str, optional): The species label. well (int, optional): Either ``0`` or ``1`` for the reactants or products well, respectively. Returns: Optional[int]: The number of occurrences of this species in the respective well. """ if species is None and label is None: raise ValueError('Called get_species_count without a species nor its label.') if well not in [0, 1]: raise ValueError(f'Got well = {well}, expected either 0 or 1.') label = species.label if species is not None else label well_str = self.label.split('<=>')[well] wells = [check_label(spc_label)[0] for spc_label in well_str.strip().split(self.plus)] count = sum([label == spc_label for spc_label in wells]) return count
[docs] def get_reactants_and_products(self, arc: bool = True, ) -> Tuple[List[Union[ARCSpecies, Species]], List[Union[ARCSpecies, Species]]]: """ Get a list of reactant and product species including duplicate species, if any. The species could either be ``ARCSpecies`` or ``RMGSpecies`` object instance. Args: arc (bool, optional): Whether to return the species as ARCSpecies (``True``) or as RMG Species (``False``). Returns: Tuple[List[Union[ARCSpecies, Species]], List[Union[ARCSpecies, Species]]]: The reactants and products. """ reactants, products = list(), list() for r_spc in self.r_species: if arc: reactants.extend([r_spc] * self.get_species_count(species=r_spc, well=0)) else: reactants.extend([Species(label=r_spc.label, molecule=[r_spc.mol])] * self.get_species_count(species=r_spc, well=0)) for p_spc in self.p_species: if arc: products.extend([p_spc] * self.get_species_count(species=p_spc, well=1)) else: products.extend([Species(label=p_spc.label, molecule=[p_spc.mol])] * self.get_species_count(species=p_spc, well=1)) return reactants, products
[docs] def get_atom_map(self, verbose: int = 0) -> Optional[List[int]]: """ Get the atom mapping of the reactant atoms to the product atoms. I.e., an atom map of [0, 2, 1] means that reactant atom 0 matches product atom 0, reactant atom 1 matches product atom 2, and reactant atom 2 matches product atom 1. All indices are 0-indexed. Employs the Kabsch, Hungarian, and Uno algorithms to exhaustively locate the best alignment for non-oriented, non-ordered 3D structures. Args: verbose (int): The verbosity level (0-4). Returns: Optional[List[int]] The atom map, entry indices correspond to reactant indices, entry values correspond to product indices. """ atom_map = None try: reactants = QCMolecule.from_data( data='\n--\n'.join(xyz_to_str(reactant.get_xyz()) for reactant in self.r_species), molecular_charge=self.charge, molecular_multiplicity=self.multiplicity, fragment_charges=[reactant.charge for reactant in self.r_species], fragment_multiplicities=[reactant.multiplicity for reactant in self.r_species], orient=False, ) products = QCMolecule.from_data( data='\n--\n'.join(xyz_to_str(product.get_xyz()) for product in self.p_species), molecular_charge=self.charge, molecular_multiplicity=self.multiplicity, fragment_charges=[product.charge for product in self.p_species], fragment_multiplicities=[product.multiplicity for product in self.p_species], orient=False, ) except ValidationError as e: logger.warning(f'Could not get atom map for {self}, got:\n{e}') else: data = products.align(ref_mol=reactants, verbose=verbose)[1] atom_map = data['mill'].atommap.tolist() return atom_map
[docs] def get_mapped_product_xyz(self): """ Uses the mapping from product onto reactant to create a new ARC Species for the mapped product. For now, only functional for A <=> B reactions. Returns: Tuple[dict, ARCSpecies]: dict: Mapped product atoms in ARC dictionary format. ARCSpecies: ARCSpecies object created from mapped coordinates. """ if len(self.p_species) > 1: raise ValueError(f'Can only return a mapped product for reactions with a single product, ' f'got {len(self.p_species)}.') mapped_xyz = sort_xyz_using_indices(xyz_dict=self.p_species[0].get_xyz(), indices=self.atom_map) mapped_product = ARCSpecies(label=self.p_species[0].label, mol=self.p_species[0].mol.copy(deep=True), multiplicity=self.p_species[0].multiplicity, charge=self.p_species[0].charge, xyz=mapped_xyz, ) return mapped_xyz, mapped_product
[docs] def get_reactants_xyz(self, return_format='str') -> Union[dict, str]: """ Get a combined string/dict representation of the cartesian coordinates of all reactant species. Args: return_format (str): Either ``'dict'`` to return a dict format or ``'str'`` to return a string format. Default: ``'str'``. Returns: Union[dict, str] The combined cartesian coordinates. Todo: identify flux pairs like in RMG orient a line: cm1 - X -- Y - cm2 if there are two reactants """ xyz_dict = dict() if len(self.r_species) == 1: xyz_dict = self.r_species[0].get_xyz() elif len(self.r_species) >= 2: xyz_dict = {'symbols': tuple(), 'isotopes': tuple(), 'coords': tuple()} for reactant in self.r_species: xyz = reactant.get_xyz() xyz_dict['symbols'] += xyz['symbols'] xyz_dict['isotopes'] += xyz['isotopes'] xyz_dict['coords'] += xyz['coords'] xyz_dict = check_xyz_dict(xyz_dict) xyz_dict = xyz_to_str(xyz_dict) if return_format == 'str' else xyz_dict return xyz_dict
[docs] def get_products_xyz(self, return_format='str') -> Union[dict, str]: """ Get a combined string/dict representation of the cartesian coordinates of all product species. The resulting coordinates are ordered as the reactants using an atom map. Args: return_format (str): Either ``'dict'`` to return a dict format or ``'str'`` to return a string format. Default: ``'str'``. Returns: Union[dict, str] The combined cartesian coordinates. Todo: - identify flux pairs like in RMG - orient a line: cm1 - X - Y - cm2 if there are two reactants - Combine with get_mapped_product_xyz() """ xyz_dict = mapped_xyz_dict = {'symbols': tuple(), 'isotopes': tuple(), 'coords': tuple()} for product in self.p_species: xyz = product.get_xyz() xyz_dict['symbols'] += xyz['symbols'] xyz_dict['isotopes'] += xyz['isotopes'] xyz_dict['coords'] += xyz['coords'] for i in range(len(xyz_dict['symbols'])): mapped_xyz_dict['symbols'] += (xyz_dict['symbols'][self.atom_map[i]],) mapped_xyz_dict['isotopes'] += (xyz_dict['isotopes'][self.atom_map[i]],) mapped_xyz_dict['coords'] += (xyz_dict['coords'][self.atom_map[i]],) mapped_xyz_dict = check_xyz_dict(mapped_xyz_dict) if return_format == 'str': mapped_xyz_dict = xyz_to_str(mapped_xyz_dict) return mapped_xyz_dict
[docs]def remove_dup_species(species_list: List[ARCSpecies]) -> List[ARCSpecies]: """ Remove duplicate species from a species list. Used when assigning r_species and p_species. Args: species_list (List[ARCSpecies]): The species list to process. Returns: List[ARCSpecies]: A list of species without duplicates. """ if species_list is None or not(len(species_list)): return list() new_species_list = list() for species in species_list: if species.label not in [spc.label for spc in new_species_list]: new_species_list.append(species) return new_species_list