"""
Processor module for computing thermodynamic properties and rate coefficients using statistical mechanics.
"""
import os
import shutil
from typing import Optional
import arc.plotter as plotter
from arc.common import ARC_PATH, get_logger, read_yaml_file, save_yaml_file
from arc.level import Level
from arc.job.local import execute_command
from arc.statmech.factory import statmech_factory
logger = get_logger()
THERMO_SCRIPT_PATH = os.path.join(ARC_PATH, 'arc', 'scripts', 'rmg_thermo.py')
KINETICS_SCRIPT_PATH = os.path.join(ARC_PATH, 'arc', 'scripts', 'rmg_kinetics.py')
R = 8.31446261815324 # J/(mol*K)
EA_UNIT_CONVERSION = {'J/mol': 1, 'kJ/mol': 1e+3, 'cal/mol': 4.184, 'kcal/mol': 4.184e+3}
[docs]def process_arc_project(thermo_adapter: str,
kinetics_adapter: str,
project: str,
project_directory: str,
species_dict: dict,
reactions: list,
output_dict: dict,
bac_type: Optional[str] = None,
sp_level: Optional[Level] = None,
freq_scale_factor: float = 1.0,
compute_thermo: bool = True,
compute_rates: bool = True,
compute_transport: bool = False,
T_min: tuple = None,
T_max: tuple = None,
T_count: int = 50,
lib_long_desc: str = '',
compare_to_rmg: bool = True,
three_params: bool = True,
skip_nmd: bool = False,
) -> None:
"""
Process an ARC project, generate thermo and rate coefficients using statistical mechanics (statmech).
Args:
thermo_adapter (str): The software to use for calculating thermodynamic data.
kinetics_adapter (str): The software to use for calculating rate coefficients.
project (str): The ARC project name.
project_directory (str): The path to the ARC project directory.
species_dict (dict): Keys are labels, values are ARCSpecies objects.
reactions (list): Entries are ARCReaction objects.
output_dict (dict): Keys are labels, values are output file paths.
See Scheduler for a description of this dictionary.
bac_type (str, optional): The bond additivity correction type. 'p' for Petersson- or 'm' for Melius-type BAC.
``None`` to not use BAC.
sp_level (Level, optional): The level of theory used for energy corrections.
freq_scale_factor (float, optional): The harmonic frequencies scaling factor.
compute_thermo (bool, optional): Whether to compute thermodynamic properties for the provided species.
compute_rates (bool, optional): Whether to compute high pressure limit rate coefficients.
compute_transport (bool, optional): Whether to compute transport properties.
T_min (tuple, optional): The minimum temperature for kinetics computations, e.g., (500, 'K').
T_max (tuple, optional): The maximum temperature for kinetics computations, e.g., (3000, 'K').
T_count (int, optional): The number of temperature points between ``T_min`` and ``T_max``.
lib_long_desc (str, optional): A multiline description of levels of theory for the resulting RMG libraries.
compare_to_rmg (bool, optional): If ``True``, ARC's calculations will be compared against estimations
from RMG's database.
three_params (bool, optional): Compute rate coefficients using the modified three-parameter Arrhenius equation
format (``True``, default) or classical two-parameter Arrhenius equation format
(``False``).
skip_nmd (bool, optional): Whether to skip the normal mode displacement check analysis. Defaults to ``False``.
"""
T_min = T_min or (300, 'K')
T_max = T_max or (3000, 'K')
if isinstance(T_min, (int, float)):
T_min = (T_min, 'K')
if isinstance(T_max, (int, float)):
T_max = (T_max, 'K')
T_count = T_count or 50
species_for_thermo_lib, unconverged_species = list(), list()
rxns_for_kinetics_lib, unconverged_rxns = list(), list()
species_for_transport_lib = list()
bde_report = dict()
output_directory = os.path.join(project_directory, 'output')
libraries_path = os.path.join(output_directory, 'RMG libraries')
if not os.path.isdir(output_directory):
os.makedirs(output_directory)
# 1. Rates
if compute_rates:
for reaction in reactions:
if reaction.ts_species.ts_guesses_exhausted:
continue
species_converged = True
considered_labels = list() # Species labels considered in this reaction.
if output_dict[reaction.ts_label]['convergence']:
for species in reaction.r_species + reaction.p_species:
if species.label in considered_labels:
# Consider cases where the same species appears in a reaction both as a reactant
# and as a product (e.g., H2O that catalyzes a reaction).
continue
considered_labels.append(species.label)
if output_dict[species.label]['convergence']:
statmech_adapter = statmech_factory(statmech_adapter_label=kinetics_adapter,
output_directory=output_directory,
output_dict=output_dict,
bac_type=None,
sp_level=sp_level,
freq_scale_factor=freq_scale_factor,
species=species,
)
statmech_adapter.compute_thermo(kinetics_flag=True)
else:
logger.error(f'Species {species.label} did not converge, cannot compute a rate coefficient '
f'for {reaction.label}')
unconverged_species.append(species)
species_converged = False
if species_converged:
statmech_adapter = statmech_factory(statmech_adapter_label=kinetics_adapter,
output_directory=output_directory,
output_dict=output_dict,
bac_type=None,
sp_level=sp_level,
freq_scale_factor=freq_scale_factor,
reaction=reaction,
species_dict=species_dict,
T_min=T_min,
T_max=T_max,
T_count=T_count,
three_params=three_params,
skip_nmd=skip_nmd,
)
statmech_adapter.compute_high_p_rate_coefficient()
if reaction.kinetics is not None:
rxns_for_kinetics_lib.append(reaction)
else:
unconverged_rxns.append(reaction)
else:
unconverged_rxns.append(reaction)
if rxns_for_kinetics_lib:
plotter.save_kinetics_lib(rxn_list=rxns_for_kinetics_lib,
path=libraries_path,
name=project,
lib_long_desc=lib_long_desc,
T_min=T_min[0],
T_max=T_max[0],
)
# 2. Thermo
if compute_thermo:
for species in species_dict.values():
if (species.compute_thermo or species.e0_only) and output_dict[species.label]['convergence']:
statmech_adapter = statmech_factory(statmech_adapter_label=thermo_adapter,
output_directory=output_directory,
output_dict=output_dict,
bac_type=bac_type,
sp_level=sp_level,
freq_scale_factor=freq_scale_factor,
species=species,
)
statmech_adapter.compute_thermo(kinetics_flag=False, e0_only=species.e0_only)
if species.thermo is not None:
species_for_thermo_lib.append(species)
elif not species.e0_only and species not in unconverged_species:
unconverged_species.append(species)
plotter.augment_arkane_yml_file_with_mol_repr(species, output_directory)
elif species.compute_thermo and not output_dict[species.label]['convergence'] \
and species not in unconverged_species:
unconverged_species.append(species)
if species_for_thermo_lib:
plotter.save_thermo_lib(species_list=species_for_thermo_lib,
path=libraries_path,
name=project,
lib_long_desc=lib_long_desc)
# 3. Transport
if compute_transport:
for species in species_dict.values():
if output_dict[species.label]['job_types']['onedmin'] and output_dict[species.label]['convergence']:
pass
# todo
if species_for_transport_lib:
plotter.save_transport_lib(species_list=species_for_thermo_lib,
path=libraries_path,
name=project,
lib_long_desc=lib_long_desc)
# 4. BDE
for species in species_dict.values():
# looping again to make sure all relevant Species.e0 attributes were set
if species.bdes is not None:
bde_report[species.label] = process_bdes(label=species.label, species_dict=species_dict)
if bde_report:
bde_path = os.path.join(project_directory, 'output', 'BDE_report.txt')
plotter.log_bde_report(path=bde_path, bde_report=bde_report, spc_dict=species_dict)
# Comparisons
if compare_to_rmg:
compare_thermo(species_for_thermo_lib=species_for_thermo_lib,
output_directory=output_directory)
compare_rates(rxns_for_kinetics_lib=rxns_for_kinetics_lib,
output_directory=output_directory,
T_min=T_min,
T_max=T_max,
T_count=T_count)
compare_transport(species_for_transport_lib,
output_directory=output_directory)
write_unconverged_log(unconverged_species=unconverged_species,
unconverged_rxns=unconverged_rxns,
log_file_path=os.path.join(output_directory, 'unconverged_species.log'))
clean_output_directory(project_directory)
[docs]def compare_thermo(species_for_thermo_lib: list,
output_directory: str,
) -> None:
"""
Compare the calculated thermo with RMG's estimations or libraries.
Args:
species_for_thermo_lib (list): Species for which thermochemical properties were computed.
output_directory (str): The path to the project's output folder.
"""
species_to_compare = list() # species for which thermo was both calculated and estimated.
species_thermo_path = os.path.join(output_directory, 'RMG_thermo.yml')
save_yaml_file(path=species_thermo_path,
content=[{'label': spc.label, 'adjlist': spc.mol.copy(deep=True).to_adjacency_list()} for spc in species_for_thermo_lib])
command = f'python {THERMO_SCRIPT_PATH} {species_thermo_path}'
execute_command(command=command, no_fail=True)
species_list = read_yaml_file(path=species_thermo_path)
for original_spc, rmg_spc in zip(species_for_thermo_lib, species_list):
h298, s298, comment = rmg_spc.get('h298', None), rmg_spc.get('s298', None), rmg_spc.get('comment', None)
if h298 is not None and s298 is not None:
original_spc.rmg_thermo = {'H298': h298, 'S298': s298, 'comment': comment}
species_to_compare.append(original_spc)
if len(species_to_compare):
plotter.draw_thermo_parity_plots(species_list=species_to_compare, path=output_directory)
[docs]def compare_rates(rxns_for_kinetics_lib: list,
output_directory: str,
T_min: tuple = None,
T_max: tuple = None,
T_count: int = 50,
) -> list:
"""
Compare the calculated rates with RMG's estimations or libraries.
Args:
rxns_for_kinetics_lib (list): Reactions for which rate coefficients were computed.
output_directory (str): The path to the project's output folder.
T_min (tuple, optional): The minimum temperature for kinetics computations, e.g., (500, 'K').
T_max (tuple, optional): The maximum temperature for kinetics computations, e.g., (3000, 'K').
T_count (int, optional): The number of temperature points between ``T_min`` and ``T_max``.
Returns:
list: Reactions for which a rate was both calculated and estimated.
Returning this list for testing purposes.
"""
reactions_to_compare = list() # reactions for which a rate was both calculated and estimated.
reactions_kinetics_path = os.path.join(output_directory, 'RMG_kinetics.yml')
save_yaml_file(path=reactions_kinetics_path,
content=[{'label': rxn.label,
'reactants': [spc.mol.to_adjacency_list() for spc in rxn.r_species],
'products': [spc.mol.to_adjacency_list() for spc in rxn.p_species],
'dh_rxn298': rxn.dh_rxn298,
'family': rxn.family,
} for rxn in rxns_for_kinetics_lib],
)
command = f'python {KINETICS_SCRIPT_PATH} {reactions_kinetics_path}'
o, e = execute_command(command=command, no_fail=True)
# print(f'output: {o}\nerror: {e}')
reactions_list_w_rmg_kinetics = read_yaml_file(path=reactions_kinetics_path)
for original_rxn, rxn_w_rmg_kinetics in zip(rxns_for_kinetics_lib, reactions_list_w_rmg_kinetics):
original_rxn.rmg_kinetics = original_rxn.rmg_kinetics or list()
if 'kinetics' not in rxn_w_rmg_kinetics:
continue
for kinetics_entry in rxn_w_rmg_kinetics['kinetics']:
if 'A' in kinetics_entry and 'n' in kinetics_entry and 'Ea' in kinetics_entry:
original_rxn.rmg_kinetics.append({'A': kinetics_entry['A'],
'n': kinetics_entry['n'],
'Ea': kinetics_entry['Ea'],
'comment': kinetics_entry['comment'],
})
reactions_to_compare.append(original_rxn)
if reactions_to_compare:
plotter.draw_kinetics_plots(reactions_to_compare,
T_min=T_min,
T_max=T_max,
T_count=T_count,
path=output_directory)
return reactions_to_compare
[docs]def compare_transport(species_for_transport_lib: list,
output_directory: str,
) -> None:
"""
Compare the calculates transport data with RMG's estimations.
Args:
species_for_transport_lib (list): Species for which thermochemical properties were computed.
output_directory (str): The path to the project's output folder.
"""
pass
# todo
[docs]def process_bdes(label: str,
species_dict: dict,
) -> dict:
"""
Process bond dissociation energies for a single parent species represented by `label`.
Args:
label (str): The species label.
species_dict (dict): Keys are labels, values are ARCSpecies objects.
Returns:
dict: The BDE report for a single species. Keys are pivots, values are energies in kJ/mol.
"""
source = species_dict[label]
bde_report = dict()
if source.e0 is None:
logger.error(f'Cannot calculate BDEs without E0 for {label}. Make sure freq and sp jobs ran successfully '
f'for this species.')
return bde_report
for bde_indices in source.bdes:
found_a_label, cyclic = False, False
# Index 0 of the tuple:
if source.mol.atoms[bde_indices[0] - 1].is_hydrogen():
e1 = species_dict['H'].e0
else:
bde_label = f'{label}_BDE_{bde_indices[0]}_{bde_indices[1]}_A'
bde_cyclic_label = f'{label}_BDE_{bde_indices[0]}_{bde_indices[1]}_cyclic'
cyclic = bde_cyclic_label in species_dict.keys()
if not cyclic and bde_label not in species_dict.keys():
logger.error(f'Could not find BDE species {bde_label} for generating a BDE report for {label}. '
f'Not generating a BDE report for this species.')
return dict()
found_a_label = True
e1 = species_dict[bde_cyclic_label if cyclic else bde_label].e0
# Index 1 of the tuple:
if cyclic:
e2 = 0
elif source.mol.atoms[bde_indices[1] - 1].is_hydrogen():
e2 = species_dict['H'].e0
else:
letter = 'B' if found_a_label else 'A'
bde_label = f'{label}_BDE_{bde_indices[0]}_{bde_indices[1]}_{letter}'
if bde_label not in species_dict.keys():
logger.error(f'Could not find BDE species {bde_label} for generating a BDE report for {label}. '
f'Not generating a BDE report for this species.')
return dict()
e2 = species_dict[bde_label].e0
if e1 is not None and e2 is not None:
bde_report[bde_indices] = e1 + e2 - source.e0 # products - reactant
else:
bde_report[bde_indices] = 'N/A'
logger.error(f'Could not calculate BDE for {label} between atoms '
f'{bde_indices[0]} ({source.mol.atoms[bde_indices[0] - 1].element.symbol}) '
f'and {bde_indices[1]} ({source.mol.atoms[bde_indices[1] - 1].element.symbol})')
return bde_report
[docs]def write_unconverged_log(unconverged_species: list,
unconverged_rxns: list,
log_file_path: str,
) -> None:
"""
Write a log file of unconverged species and reactions.
Args:
unconverged_species (list): List of unconverged species to report.
unconverged_rxns (list): List of unconverged reactions to report.
log_file_path (str): The path to the log file to write.
"""
if unconverged_species or unconverged_rxns:
with open(log_file_path, 'w') as f:
if unconverged_species:
f.write('Species:')
for species in unconverged_species:
f.write(species.label)
if species.is_ts:
f.write(f' rxn: {species.rxn_label}')
elif species.mol is not None:
f.write(f' SMILES: {species.mol.copy(deep=True).to_smiles()}')
f.write('\n')
if unconverged_rxns:
f.write('Reactions:')
for reaction in unconverged_rxns:
f.write(reaction.label)
[docs]def clean_output_directory(project_directory: str) -> None:
"""
A helper function to organize the output directory.
- remove redundant rotor.txt files (from kinetics jobs)
- move remaining rotor files to the rotor directory
- move the Arkane YAML file from the `species` directory to the base directory, and delete `species`
Args:
project_directory (str): The path to the ARC project directory.
"""
for base_folder in ['Species', 'rxns']:
base_path = os.path.join(project_directory, 'output', base_folder)
dir_names = list()
for (_, dirs, _) in os.walk(base_path):
dir_names.extend(dirs)
break # don't continue to explore subdirectories
for species_label in dir_names:
species_path = os.path.join(base_path, species_label)
file_names = list()
for (_, _, files) in os.walk(species_path):
file_names.extend(files)
break # don't continue to explore subdirectories
if any(['rotor' in file_name for file_name in file_names]) \
and not os.path.exists(os.path.join(species_path, 'rotors')):
os.makedirs(os.path.join(species_path, 'rotors'))
for file_name in file_names:
if '_rotor' in file_name: # move to the rotor directory
shutil.move(src=os.path.join(species_path, file_name),
dst=os.path.join(species_path, 'rotors', file_name))