Source code for arc.rmgdb

"""
A module for working with the RMG database.
"""

import os
from typing import TYPE_CHECKING, List, Optional, Tuple, Union

from rmgpy import settings as rmg_settings
from rmgpy.data.kinetics.common import find_degenerate_reactions
from rmgpy.data.rmg import RMGDatabase
from rmgpy.exceptions import KineticsError
from rmgpy.reaction import same_species_lists, Reaction

from arc.common import get_logger, generate_resonance_structures
from arc.exceptions import InputError

if TYPE_CHECKING:
    from rmgpy.data.kinetics.family import KineticsFamily
    from arc.reaction import ARCReaction


logger = get_logger()

db_path = rmg_settings['database.directory']

rmg_database_instance = None


[docs]def make_rmg_database_object() -> RMGDatabase: """ Make a clean RMGDatabase object. Returns: RMGDatabase A clean RMG database object. """ global rmg_database_instance rmg_database_instance = rmg_database_instance or RMGDatabase() return rmg_database_instance
[docs]def load_families_only(rmgdb: RMGDatabase, kinetics_families: Union[str, list] = 'all'): """ A helper function for loading kinetic families from RMG's database. Args: rmgdb (RMGDatabase): The RMG database instance. kinetics_families (Union[str, list], optional): Specific kinetics families to load. """ if kinetics_families not in ('default', 'all', 'none') and not isinstance(kinetics_families, list): raise InputError(f"kinetics families should be either 'default', 'all', 'none', or a list of names, e.g.," f" ['H_Abstraction','R_Recombination'] or ['!Intra_Disproportionation']. " f"Got:\n{kinetics_families}") if rmgdb.kinetics is None or rmgdb.kinetics.families is None: rmgdb.load( path=db_path, thermo_libraries=list(), transport_libraries='none', reaction_libraries=list(), seed_mechanisms=list(), kinetics_families=kinetics_families, kinetics_depositories=['training'], depository=False, )
[docs]def load_rmg_database(rmgdb: RMGDatabase, thermo_libraries: list = None, reaction_libraries: list = None, kinetics_families: Union[str, list] = 'default', load_thermo_libs: bool = True, load_kinetic_libs: bool = True, include_nist: bool = False, ) -> None: """ A helper function for loading the RMG database. Args: rmgdb (RMGDatabase): The RMG database instance. thermo_libraries (list, optional): Specific thermodynamic libraries to load (``None`` will load all). reaction_libraries (list, optional): Specific kinetics libraries to load (``None`` will load all). kinetics_families (list, str, optional): Specific kinetics families to load (either a list or 'default', 'all', 'none') load_thermo_libs (bool, optional): Whether to load thermodynamic libraries, ``True`` to load. load_kinetic_libs (bool, optional): Whether to load kinetics libraries, ``True`` to load. include_nist (bool, optional): Whether to include the NIST kinetics libraries, ``True`` to include, default is ``False`` """ thermo_libraries = thermo_libraries or list() reaction_libraries = reaction_libraries or list() if isinstance(thermo_libraries, str): thermo_libraries.replace(' ', '') thermo_libraries = [lib for lib in thermo_libraries.split(',')] if isinstance(reaction_libraries, str): reaction_libraries.replace(' ', '') reaction_libraries = [lib for lib in reaction_libraries.split(',')] reaction_libraries = [reaction_libraries] if kinetics_families not in ('default', 'all', 'none'): if not isinstance(kinetics_families, list): raise InputError("kinetics_families should be either 'default', 'all', 'none', or a list of names, e.g.," " ['H_Abstraction','R_Recombination'] or ['!Intra_Disproportionation'].") if not thermo_libraries: thermo_path = os.path.join(db_path, 'thermo', 'libraries') for thermo_library_path in os.listdir(thermo_path): # Avoid reading .DS_store files for compatibility with macOS if not thermo_library_path.startswith('.'): thermo_library, _ = os.path.splitext(os.path.basename(thermo_library_path)) thermo_libraries.append(thermo_library) # prioritize libraries thermo_priority = ['BurkeH2O2', 'thermo_DFT_CCSDTF12_BAC', 'DFT_QCI_thermo', 'Klippenstein_Glarborg2016', 'primaryThermoLibrary', 'primaryNS', 'NitrogenCurran', 'NOx2018', 'FFCM1(-)', 'SulfurLibrary', 'SulfurGlarborgH2S'] indices_to_pop = [] for i, lib in enumerate(thermo_libraries): if lib in thermo_priority: indices_to_pop.append(i) for i in reversed(range(len(thermo_libraries))): # pop starting from the end, so other indices won't change if i in indices_to_pop: thermo_libraries.pop(i) thermo_libraries = thermo_priority + thermo_libraries if not reaction_libraries: libraries_path = os.path.join(db_path, 'kinetics', 'libraries') for path, folders, _ in os.walk(libraries_path): if 'Surface' in path: continue for folder in folders: if os.path.isfile(os.path.join(path, folder, 'reactions.py')): prefix = path.split(libraries_path)[1] library = os.path.join(prefix, folder) library = library[1:] if library[0] == '/' else library reaction_libraries.append(library) if not load_kinetic_libs: reaction_libraries = list() if not load_thermo_libs: thermo_libraries = list() # reaction_libraries = list() # empty library list for debugging logger.info('\n\nLoading the RMG database...') kinetics_depositories = ['training', 'NIST'] if include_nist else ['training'] rmgdb.load( path=db_path, thermo_libraries=thermo_libraries, transport_libraries=['PrimaryTransportLibrary', 'NOx2018', 'GRI-Mech'], reaction_libraries=reaction_libraries, seed_mechanisms=list(), kinetics_families=kinetics_families, kinetics_depositories=kinetics_depositories, depository=False, ) for family in rmgdb.kinetics.families.values(): try: family.add_rules_from_training(thermo_database=rmgdb.thermo) except KineticsError: logger.info(f'Could not train family {family}') else: family.fill_rules_by_averaging_up(verbose=False) family.save_order = True logger.info('\n\n')
[docs]def determine_family(reaction: 'ARCReaction', db: Optional[RMGDatabase] = None, ): """ Determine the RMG reaction family for an ARC reaction. A wrapper for ARCReaction.determine_family(). This wrapper is useful because it makes a new instance of the rmgdb if needed. Args: reaction ('ARCReaction'): An ARCReaction object instance. db (RMGDatabase, optional): The RMG database instance. """ global rmg_database_instance if reaction.family is None: db = db or rmg_database_instance if db is None: db = make_rmg_database_object() load_families_only(db) if reaction.rmg_reaction is None: reaction.rmg_reaction_from_arc_species() reaction.determine_family(db)
[docs]def determine_reaction_family(rmgdb: RMGDatabase, reaction: Reaction, save_order: bool = True, ) -> Tuple[Optional['KineticsFamily'], bool]: """ Determine the RMG kinetic family for a given ``ARCReaction`` object instance. Returns ``None`` if no family found or more than one family found. Args: rmgdb (RMGDatabase): The RMG database instance. reaction (Reaction): The RMG Reaction object. save_order (bool, optional): Whether to retain atomic order of the RMG ``reaction`` object instance. Returns: Tuple[Optional[KineticsFamily], bool] - The corresponding RMG reaction's family. ``None`` if no family was found or more than one family were found. - Whether the family is its own reverse. ``True`` if it is. """ for rmg_spc in reaction.reactants + reaction.products: generate_resonance_structures(rmg_spc) fam_list = loop_families(rmgdb=rmgdb, reaction=reaction) families = [fam_l[0] for fam_l in fam_list] if len(set(families)) == 1: family = families[0] if save_order: family.save_order = True return family, family.own_reverse else: return None, False
[docs]def loop_families(rmgdb: RMGDatabase, reaction: Reaction, ) -> List[Tuple['KineticsFamily', list]]: """ Loop through kinetic families and return a list of tuples of (family, degenerate_reactions) corresponding to ``reaction``. Args: rmgdb (RMGDatabase): The RMG database instance. reaction (Reaction): The RMG Reaction object instance. Returns: List[Tuple['KineticsFamily', list]] Entries are tuples of a corresponding RMG KineticsFamily instance and a list of degenerate reactions. """ reaction = reaction.copy() # Use a copy to avoid changing atom order in the molecules by RMG. for spc in reaction.reactants + reaction.products: generate_resonance_structures(spc) fam_list = list() for family in rmgdb.kinetics.families.values(): family.save_order = True degenerate_reactions = list() family_reactions_by_r = list() # Family reactions for the specified reactants. family_reactions_by_rnp = list() # Family reactions for the specified reactants and products. if len(reaction.reactants) == 1: for reactant0 in reaction.reactants[0].molecule: fam_rxn = family.generate_reactions(reactants=[reactant0], products=reaction.products, delete_labels=False, ) if fam_rxn: family_reactions_by_r.extend(fam_rxn) elif len(reaction.reactants) == 2: for reactant0 in reaction.reactants[0].molecule: for reactant1 in reaction.reactants[1].molecule: fam_rxn = family.generate_reactions(reactants=[reactant0, reactant1], products=reaction.products, delete_labels=False, ) if fam_rxn: family_reactions_by_r.extend(fam_rxn) elif len(reaction.reactants) == 3: for reactant0 in reaction.reactants[0].molecule: for reactant1 in reaction.reactants[1].molecule: for reactant2 in reaction.reactants[2].molecule: fam_rxn = family.generate_reactions(reactants=[reactant0, reactant1, reactant2], products=reaction.products, delete_labels=False, ) if fam_rxn: family_reactions_by_r.extend(fam_rxn) if len(reaction.products) == 1: for fam_rxn in family_reactions_by_r: for product0 in reaction.products[0].molecule: if same_species_lists([product0], fam_rxn.products, save_order=True): family_reactions_by_rnp.append(fam_rxn) degenerate_reactions = find_degenerate_reactions(rxn_list=family_reactions_by_rnp, same_reactants=False, kinetics_database=rmgdb.kinetics, save_order=True ) elif len(reaction.products) == 2: for fam_rxn in family_reactions_by_r: for product0 in reaction.products[0].molecule: for product1 in reaction.products[1].molecule: if same_species_lists([product0, product1], fam_rxn.products, save_order=True): family_reactions_by_rnp.append(fam_rxn) degenerate_reactions = find_degenerate_reactions(rxn_list=family_reactions_by_rnp, same_reactants=False, kinetics_database=rmgdb.kinetics, save_order=True ) elif len(reaction.products) == 3: for fam_rxn in family_reactions_by_r: for product0 in reaction.products[0].molecule: for product1 in reaction.products[1].molecule: for product2 in reaction.products[2].molecule: if same_species_lists([product0, product1, product2], fam_rxn.products, save_order=True): family_reactions_by_rnp.append(fam_rxn) degenerate_reactions = find_degenerate_reactions(rxn_list=family_reactions_by_rnp, same_reactants=False, kinetics_database=rmgdb.kinetics, save_order=True ) if degenerate_reactions: fam_list.append((family, degenerate_reactions)) return fam_list
[docs]def determine_rmg_kinetics(rmgdb: RMGDatabase, reaction: Reaction, dh_rxn298: float, ) -> List[Reaction]: """ Determine kinetics for `reaction` (an RMG Reaction object) from RMG's database, if possible. Assigns a list of all matching entries from both libraries and families. Args: rmgdb (RMGDatabase): The RMG database instance. reaction (Reaction): The RMG Reaction object. dh_rxn298 (float): The heat of reaction at 298 K in J/mol. Returns: List[Reaction] All matching RMG reactions (both libraries and families) with a populated ``.kinetics`` attribute. """ reaction = reaction.copy() rmg_reactions = list() # Libraries: for library in rmgdb.kinetics.libraries.values(): library_reactions = library.get_library_reactions() for library_reaction in library_reactions: if reaction.is_isomorphic(library_reaction): library_reaction.comment = f'Library: {library.label}' rmg_reactions.append(library_reaction) break # Families: fam_list = loop_families(rmgdb, reaction) for family, degenerate_reactions in fam_list: for deg_rxn in degenerate_reactions: template = family.retrieve_template(deg_rxn.template) kinetics = family.estimate_kinetics_using_rate_rules(template)[0] kinetics.change_rate(deg_rxn.degeneracy) kinetics = kinetics.to_arrhenius(dh_rxn298) # Convert ArrheniusEP to Arrhenius using the dHrxn at 298K deg_rxn.kinetics = kinetics deg_rxn.comment = f'Family: {deg_rxn.family}' deg_rxn.reactants = reaction.reactants deg_rxn.products = reaction.products rmg_reactions.extend(degenerate_reactions) worked_through_nist_fams = [] # NIST: for family, degenerate_reactions in fam_list: if family not in worked_through_nist_fams: worked_through_nist_fams.append(family) for depo in family.depositories: if 'NIST' in depo.label: for entry in depo.entries.values(): rxn = entry.item rxn.kinetics = entry.data rxn.comment = f'NIST: {entry.index}' if entry.reference is not None: rxn.comment += f'{entry.reference.authors[0]} {entry.reference.year}' rmg_reactions.append(rxn) return rmg_reactions
[docs]def get_family(rmgdb: RMGDatabase, label: Optional[str], ) -> Optional['KineticsFamily']: """ Get a KineticsFamily object instance corresponding to the input family ``label``. Args: rmgdb (RMGDatabase): The RMG database instance. label (str): The label of the desired family. Returns: Optional['KineticsFamily']: The desired family object instance. """ if label is None: return None label = label.lower() for family_label, family in rmgdb.kinetics.families.items(): if family_label.lower() == label: family.save_order = True return family return None
[docs]def clean_rmg_database_object(db: RMGDatabase) -> RMGDatabase: """ clears the global RMG database object. Args: db (RMGDatabase): the database object Returns: RMGDatabase: the database object """ global rmg_database_instance del rmg_database_instance rmg_database_instance = None return make_rmg_database_object()