"""
A module for representing and manipulating `Z matrices <https://en.wikipedia.org/wiki/Z-matrix_(chemistry)>`_
(internal coordinates).
An example for a (consolidated) zmat representation for methane::
{'symbols': ('C', 'H', 'H', 'H', 'H'),
'coords': ((None, None, None),
('R_1_0', None, None),
('R_2|3|4_1|2|3', 'A_2|3|4_1|2|3_0|0|0', None),
('R_2|3|4_1|2|3', 'A_2|3|4_1|2|3_0|0|0', 'D_3_2_0_1'),
('R_2|3|4_1|2|3', 'A_2|3|4_1|2|3_0|0|0', 'D_4_3_0_2')),
'vars': {'R_1_0': 1.09125,
'D_3_2_0_1': 120.0,
'D_4_3_0_2': 240.0,
'R_2|3|4_1|2|3': 1.78200,
'A_2|3|4_1|2|3_0|0|0': 35.26439},
'map': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
}
Isotope information is not saved in the zmat, it exists in the xyz dict,
and can be attained using the zmat map if needed.
Note that a 180 (or 0) degree for an angle in a Z matrix is not allowed, since a GIC (Generalized Internal Coordinates)
optimization has no defined derivative at 180 degrees. Instead, a dummy atom 'X' is used.
Dihedral angles may have any value.
"""
import math
import numpy as np
import operator
import re
from copy import deepcopy
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union
from arc.common import get_logger, is_angle_linear, key_by_val
from arc.exceptions import ZMatError, VectorsError
from arc.molecule.molecule import Molecule
from arc.species.vectors import calculate_param, get_vector_length
if TYPE_CHECKING:
from arc.molecule.molecule import Atom
logger = get_logger()
DEFAULT_CONSOLIDATION_R_TOL = 1e-4
DEFAULT_CONSOLIDATION_A_TOL = 1e-3
DEFAULT_CONSOLIDATION_D_TOL = 1e-3
DEFAULT_COMPARISON_R_TOL = 0.01 # Angstrom
DEFAULT_COMPARISON_A_TOL = 2.0 # degrees
DEFAULT_COMPARISON_D_TOL = 2.0 # degrees
TOL_180 = 0.9 # degrees
KEY_FROM_LEN = {2: 'R', 3: 'A', 4: 'D'}
[docs]
def xyz_to_zmat(xyz: Dict[str, tuple],
mol: Optional[Molecule] = None,
constraints: Optional[Dict[str, List[Tuple[int, ...]]]] = None,
consolidate: bool = True,
consolidation_tols: Dict[str, float] = None,
fragments: Optional[List[List[int]]] = None,
) -> Dict[str, tuple]:
"""
Generate a z-matrix from cartesian coordinates.
The zmat is a dictionary with the following keys:
- 'symbols': a tuple of strings representing atomic symbols; defined as a list and converted to a tuple
- 'coords': a tuple of tuples representing internal coordinates; defined as a list and converted to a tuple
- 'vars': a dictionary of all variables defined for the coordinates
- 'map': a dictionary connecting atom indices in the zmat (keys) to atom indices in the mol/coords (values)
This function assumes ``xyz`` has no dummy atoms.
This function does not attempt to resolve constrain locks, and assumes only few non-circular constraints were given.
Args:
xyz (dict): The xyz coordinates.
mol (Molecule, optional): The corresponding RMG Molecule. If given, the bonding information will be used
to generate a more meaningful zmat.
constraints (dict, optional): Accepted keys are:
'R_atom', 'R_group',
'A_atom', 'A_group',
'D_atom', 'D_group'.
'R', 'A', and 'D' are constrain distances, angles, and dihedrals, respectively.
Values are lists of atom index tuples (0-indexed). The atom indices order matters.
Specifying '_atom' will cause only the first atom in the specified list values
to translate/rotate if the corresponding zmat parameter is changed.
Specifying '_group' will cause the entire group connected to the first atom
to translate/rotate if the corresponding zmat parameter is changed.
Specifying '_groups' (only valid for D) will cause the groups connected to
the first two atoms to translate/rotate if the corresponding parameter is changed.
Note that 'D_groups' should not be passed directly to this function, but to
arc.species.converter.modify_coords() instead (it is a composite constraint).
consolidate (bool, optional): Whether to consolidate the zmat after generation, ``True`` to consolidate.
consolidation_tols (dict, optional): Keys are 'R', 'A', 'D', values are floats representing absolute tolerance
for consolidating almost equal internal coordinates.
fragments (List[List[int]], optional):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
indices are 0-indexed.
Raises:
ZMatError: If the zmat could not be generated.
Returns: Dict[str, tuple]
The z-matrix.
"""
fragments = fragments or [list(range(len(xyz['symbols'])))]
constraints = constraints or dict()
if mol is None and any('group' in constraint_key for constraint_key in constraints.keys()):
raise ZMatError(f'Cannot generate a constrained zmat without mol. Got mol=None and constraints=\n{constraints}')
for constraint_list in constraints.values():
for constraint_tuple in constraint_list:
for index in constraint_tuple:
if mol is not None and index >= len(mol.atoms):
raise ZMatError(f'The following constraints (containing atom index {index}) are invalid for '
f'a molecule with only {len(mol.atoms)} atoms:\n{constraints}')
if index >= len(xyz['symbols']):
raise ZMatError(f'The following constraints (containing atom index {index}) are invalid for '
f'coordinates with only {len(xyz["symbols"])} atoms:\n{constraints}')
xyz = xyz.copy()
zmat = {'symbols': list(), 'coords': list(), 'vars': dict(), 'map': dict()}
atom_order = get_atom_order(xyz=xyz, mol=mol, constraints_dict=constraints, fragments=fragments)
connectivity = get_connectivity(mol=mol) if mol is not None else None
skipped_atoms = list() # atoms for which constrains are applied
for atom_index in atom_order:
zmat, xyz, skipped = _add_nth_atom_to_zmat(
zmat=zmat,
xyz=xyz,
connectivity=connectivity,
n=len(list(zmat['symbols'])),
atom_index=atom_index,
constraints=constraints,
fragments=fragments,
)
skipped_atoms.extend(skipped)
while len(skipped_atoms):
num_of_skipped_atoms = len(skipped_atoms)
indices_to_pop = list()
for i, atom_index in enumerate(skipped_atoms):
zmat, xyz, skipped = _add_nth_atom_to_zmat(
zmat=zmat,
xyz=xyz,
connectivity=connectivity,
n=len(list(zmat['symbols'])),
atom_index=atom_index,
constraints=constraints,
fragments=fragments,
)
if not len(skipped):
# This atom was not skipped this time, remove it from the skipped atoms list....
indices_to_pop.append(i)
for i in reversed(indices_to_pop):
skipped_atoms.pop(i)
if num_of_skipped_atoms == len(skipped_atoms):
# No atoms were popped from the skipped atoms list when iterating through all skipped atoms.
raise ZMatError(f"Could not generate the zmat, skipped atoms could not be assigned, there's probably "
f"a constraint lock. The partial zmat is:\n{zmat}\n\nskipped atoms are:\n{skipped_atoms}.\n"
f"constraints were:\n{constraints}")
if consolidate and not constraints:
try:
zmat = consolidate_zmat(zmat, mol, consolidation_tols)
except (KeyError, ZMatError) as e:
logger.error(f'Could not consolidate zmat, got:\n{e.__class__}: {str(e)}')
logger.error(f'Generating zmat without consolidation.')
zmat['symbols'] = tuple(zmat['symbols'])
zmat['coords'] = tuple(zmat['coords'])
return zmat
[docs]
def determine_r_atoms(zmat: Dict[str, Union[dict, tuple]],
xyz: Dict[str, tuple],
connectivity: Dict[int, List[int]],
n: int,
atom_index: int,
r_constraint: Optional[Tuple[int]] = None,
a_constraint: Optional[Tuple[int, int]] = None,
d_constraint: Optional[Tuple[int, int, int]] = None,
trivial_assignment: bool = False,
fragments: Optional[List[List[int]]] = None,
) -> Optional[List[int]]:
"""
Determine the atoms for defining the distance R.
This should be in the form: [n, <some other atom already in the zmat>]
Args:
zmat (dict): The zmat.
xyz (dict): The xyz dict.
connectivity (dict): The atoms connectivity (keys are indices in the mol/xyz).
n (int): The 0-index of the atom in the zmat to be added.
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat and the molecule/xyz)
r_constraint (tuple, optional): R-type constraints. The atom index to which the atom being checked is
constrained. ``None`` if it is not constrained.
a_constraint (tuple, optional): A-type constraints. The atom indices to which the atom being checked is
constrained. ``None`` if it is not constrained.
d_constraint (tuple, optional): D-type constraints. The atom indices to which the atom being checked is
constrained. ``None`` if it is not constrained.
trivial_assignment (bool, optional): Whether to attempt assigning atoms without considering connectivity
if the connectivity assignment fails.
fragments (List[List[int]], optional):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
indices are 0-indexed.
Raises:
ZMatError: If the R atoms could not be determined.
Returns: Optional[List[int]]
The 0-indexed z-mat R atoms.
"""
if is_atom_in_new_fragment(atom_index=atom_index, zmat=zmat, fragments=fragments):
connectivity = None
if len(zmat['coords']) == 0:
# This is the 1st atom added to the zmat, there's no distance definition here.
r_atoms = None
elif any(constraint is not None for constraint in [r_constraint, a_constraint, d_constraint]):
# 1. Always use the constraint if given.
if r_constraint is not None:
r_atoms = [n] + [key_by_val(zmat['map'], r_constraint[1])]
elif a_constraint is not None:
r_atoms = [n] + [key_by_val(zmat['map'], a_constraint[1])]
elif d_constraint is not None:
r_atoms = [n] + [key_by_val(zmat['map'], d_constraint[1])]
elif connectivity is not None:
# 2. Use connectivity if the atom is not constrained.
r_atoms = [n]
atom_dict = dict() # Keys are neighbor atom indices, values are tuples of depth and linearity.
for atom_c in connectivity[atom_index]:
# Code adopted from arc.common.determine_top_group_indices():.
linear = True # Assume the chain from atom_index towards atom_c is linear unless otherwise proven.
explored_atom_list, atom_list_to_explore1, atom_list_to_explore2, top = [atom_index], [atom_c], [], []
while len(atom_list_to_explore1 + atom_list_to_explore2):
for atom3 in atom_list_to_explore1:
top.append(atom3)
if atom3 in connectivity:
for atom4 in connectivity[atom3]:
if atom4 not in explored_atom_list and atom4 not in atom_list_to_explore2:
if xyz['symbols'][atom4] in ['H', 'F', 'Cl', 'Br', 'I', 'X']:
# Append w/o further exploration.
top.append(atom4)
else:
atom_list_to_explore2.append(atom4) # Explore it further.
explored_atom_list.append(atom3) # Mark as explored.
atom_list_to_explore1, atom_list_to_explore2 = atom_list_to_explore2, []
if len(top) >= 2:
# Calculate the angle formed with the index_atom.
angle = calculate_param(coords=xyz['coords'], atoms=[atom_index] + top[-2:], index=0)
if not is_angle_linear(angle, tolerance=TOL_180):
linear = False
if len(top) >= 3:
# If it's not linear and there are 3 or more atoms, we have all the info that we need.
break
atom_dict[atom_c] = (len(top), linear)
long_non_linear, long_linear, two_non_linear, two_linear, one = list(), list(), list(), list(), list()
for atom_c, val in atom_dict.items():
if atom_c in list(zmat['map'].values()):
zmat_index_c = key_by_val(zmat['map'], atom_c)
if not is_dummy(zmat, zmat_index_c):
if val[0] >= 3 and not val[1]:
long_non_linear.append(zmat_index_c)
elif val[0] >= 3 and val[1]:
long_linear.append(zmat_index_c)
elif val[0] == 2 and not val[1]:
two_non_linear.append(zmat_index_c)
elif val[0] == 2 and val[1]:
two_linear.append(zmat_index_c)
elif val[0] == 1:
one.append(zmat_index_c)
if len(long_non_linear):
r_atoms.append(long_non_linear[0])
elif len(two_non_linear):
r_atoms.append(two_non_linear[0])
elif len(long_linear):
r_atoms.append(long_linear[0])
elif len(two_linear):
r_atoms.append(two_linear[0])
elif len(one):
r_atoms.append(one[0])
if len(set(r_atoms)) != 2:
trivial_assignment = True
else:
trivial_assignment = True
r_atoms = list()
if trivial_assignment and isinstance(r_atoms, list) and len(r_atoms) != 2:
# 3. Use trivial atom assigment if constraint and connectivity were not given.
r_atoms = [n]
if len(zmat['coords']) in [1, 2]:
r_atoms.append(len(zmat['coords']) - 1)
else:
for i in reversed(range(n)):
if not is_dummy(zmat, i):
r_atoms.append(i)
break
if len(set(r_atoms)) != 2:
raise ZMatError(f'Could not come up with two unique non-dummy r_atoms (r_atoms = {r_atoms}).')
if r_atoms is not None and r_atoms[-1] not in list(zmat['map'].keys()):
raise ZMatError(f'The reference R atom {r_atoms[-1]} for the index atom {atom_index} has not been '
f'added to the zmat yet. Added atoms are (zmat index: xyz index): {zmat["map"]}.')
if r_atoms is not None and len(set(r_atoms)) != 2:
raise ZMatError(f'Could not come up with two unique r_atoms (r_atoms = {r_atoms}).')
return r_atoms
[docs]
def determine_a_atoms(zmat: Dict[str, Union[dict, tuple]],
coords: Union[list, tuple],
connectivity: Dict[int, List[int]],
r_atoms: Optional[List[int]],
n: int,
atom_index: int,
a_constraint: Optional[Tuple[int, int]] = None,
d_constraint: Optional[Tuple[int, int, int]] = None,
a_constraint_type: Optional[str] = None,
trivial_assignment: bool = False,
fragments: Optional[List[List[int]]] = None,
) -> Optional[List[int]]:
"""
Determine the atoms for defining the angle A.
This should be in the form: [n, r_atoms[1], <some other atom already in the zmat>]
Args:
zmat (dict): The zmat.
coords (list, tuple): Just the 'coords' part of the xyz dict.
connectivity (dict): The atoms connectivity (keys are indices in the mol/xyz).
r_atoms (list): The determined r_atoms.
n (int): The 0-index of the atom in the zmat to be added.
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat and the molecule/xyz)
a_constraint (tuple, optional): A-type constraints. The atom indices to which the atom being checked is
constrained. ``None`` if it is not constrained.
d_constraint (tuple, optional): D-type constraints. The atom indices to which the atom being checked is
constrained. ``None`` if it is not constrained.
a_constraint_type (str, optional): The A constraint type ('A_atom', or 'A_group').
trivial_assignment (bool, optional): Whether to attempt assigning atoms without considering connectivity
if the connectivity assignment fails.
fragments (List[List[int]], optional):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
Raises:
ZMatError: If the A atoms could not be determined.
indices are 0-indexed.
Returns: Optional[List[int]]
The 0-indexed z-mat A atoms.
"""
if r_atoms is not None and is_atom_in_new_fragment(atom_index=atom_index, zmat=zmat,
fragments=fragments, skip_atoms=r_atoms):
connectivity = None
if r_atoms is not None and len(r_atoms) != 2:
raise ZMatError(f'r_atoms must be a list of length 2, got {r_atoms}')
if len(zmat['coords']) <= 1:
# This is the 1st or 2nd atom added to the zmat, there's no angle definition here.
a_atoms = None
elif a_constraint is not None:
# always use the constraint if given
if a_constraint_type not in ['A_atom', 'A_group', None]:
raise ZMatError(f'Got an invalid A constraint type "{a_constraint_type}" for {a_constraint}')
a_atoms = [n] + [key_by_val(zmat['map'], atom) for atom in a_constraint[1:]]
elif d_constraint is not None:
# consider d_constraint for consistency if a_constraint is None
a_atoms = [n] + [key_by_val(zmat['map'], atom) for atom in d_constraint[1:3]]
elif connectivity is not None:
a_atoms = [atom for atom in r_atoms]
for atom in connectivity[zmat['map'][a_atoms[-1]]] + connectivity[atom_index]:
# An angle should be defined between an atom, its neighbor, and its neighbor's neighbor.
if atom in list(zmat['map'].values()):
zmat_index = key_by_val(zmat['map'], atom)
if atom != atom_index and zmat_index not in a_atoms and not is_dummy(zmat, zmat_index):
r"""
Check whether this atom (B) is part of a linear chain. If it is, try to correctly determine
dihedrals in this molecule w/o this atom, otherwise it's meaningless, and the zmat looses info.
D (atom_index, r_atoms[0])
/
E -- A -- B -- C (r_atoms[1])
/
F (B is the atom considered here, corresponding to 'atom' / 'zmat_index')
"""
i = 0
atom_b, atom_c = atom, zmat['map'][r_atoms[1]]
while i < len(list(connectivity.keys())):
num_of_neighbors = len(list(connectivity[atom_b]))
if num_of_neighbors == 1:
# Atom B is only connected to C, no need to intervene.
break
elif num_of_neighbors == 2:
# Atom B might be in a linear chain, determine the A -- B -- C angle.
b_neighbors = connectivity[atom_b]
atom_a = b_neighbors[0] if b_neighbors[0] != atom_c else b_neighbors[1]
zmat_index = key_by_val(zmat['map'], atom_b)
elif num_of_neighbors > 2:
# Atom B does not necessarily lead to a linear A -- B -- C chain, no need to intervene.
zmat_index = key_by_val(zmat['map'], atom_b)
break
i += 1 # Don't loop forever.
a_atoms.append(zmat_index)
break
if len(set(a_atoms)) != 3:
trivial_assignment = True
else:
trivial_assignment = True
a_atoms = list()
if trivial_assignment and isinstance(a_atoms, list) and len(a_atoms) != 3:
a_atoms = [atom for atom in r_atoms]
for i in reversed(range(n)):
r"""
Check whether this atom (B) is part of a linear chain. If it is, try to correctly determine
dihedrals in this molecule w/o this atom, otherwise it's meaningless, and the zmat looses info.
D (atom_index, r_atoms[0])
/
E -- A -- B -- C (r_atoms[1])
/
F (B is the atom considered here)
"""
zmat_index = i
if i not in a_atoms and i in list(zmat['map'].keys()) and not is_dummy(zmat, i):
zmat_index, j = i, n - 1
atom_b, atom_c = zmat['map'][i], zmat['map'][r_atoms[1]]
while j > 0:
atom_a = zmat['map'][j]
if j != i and atom_a not in [atom_b, atom_c] \
and (j in list(zmat['map'].keys()) and not is_dummy(zmat, j)
or j not in list(zmat['map'].keys())):
zmat_index = key_by_val(zmat['map'], atom_b)
a_atoms.append(zmat_index)
break
j -= 1 # Don't loop forever.
if len(a_atoms) == 3:
break
if len(a_atoms) == 2 and zmat_index not in a_atoms:
a_atoms.append(zmat_index)
if a_atoms is not None and any([a_atom not in list(zmat['map'].keys()) for a_atom in a_atoms[1:]]):
raise ZMatError(f'The reference A atom in {a_atoms} for the index atom {atom_index} has not been '
f'added to the zmat yet. Added atoms are (zmat index: xyz index): {zmat["map"]}.')
if a_atoms is not None and len(set(a_atoms)) != 3:
# fallback to adding an atom that is not in a_atoms and not in the zmat already
for i in reversed(range(len(coords))):
if i not in a_atoms and i in list(zmat['map'].keys()) and not is_dummy(zmat, i):
a_atoms.append(i)
break
if a_atoms is not None and len(set(a_atoms)) != 3:
raise ZMatError(f'Could not come up with three unique a_atoms (a_atoms = {a_atoms}).')
return a_atoms
[docs]
def determine_d_atoms(zmat: Dict[str, Union[dict, tuple]],
xyz: Dict[str, tuple],
coords: Union[list, tuple],
connectivity: Dict[int, List[int]],
a_atoms: Optional[List[int]],
n: int,
atom_index: int,
d_constraint: Optional[Tuple[int, int, int]] = None,
d_constraint_type: Optional[str] = None,
specific_atom: Optional[int] = None,
dummy: bool = False,
fragments: Optional[List[List[int]]] = None,
) -> Optional[List[int]]:
"""
Determine the atoms for defining the dihedral angle D.
This should be in the form: [n, a_atoms[1], a_atoms[2], <some other atom already in the zmat>]
Args:
zmat (dict): The zmat.
xyz (dict): The xyz dict.
coords (list, tuple): Just the 'coords' part of the xyz dict.
connectivity (dict): The atoms connectivity (keys are indices in the mol/xyz).
a_atoms (list): The determined a_atoms.
n (int): The 0-index of the atom in the zmat to be added.
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat and the molecule/xyz)
d_constraint (tuple, optional): D-type constraints. The atom indices to which the atom being checked is
constrained. ``None`` if it is not constrained.
d_constraint_type (str, optional): The D constraint type ('D_atom', or 'D_group').
specific_atom (int, optional): A 0-index of the zmat atom to be added to a_atoms to create d_atoms.
dummy (bool, optional): Whether the atom being added (n) represents a dummy atom. ``True`` if it does.
fragments (List[List[int]], optional):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
Raises:
ZMatError: If the A atoms could not be determined.
indices are 0-indexed.
Returns:
Optional[List[int]]: The 0-indexed z-mat D atoms.
"""
if a_atoms is not None and is_atom_in_new_fragment(atom_index=atom_index, zmat=zmat,
fragments=fragments, skip_atoms=a_atoms):
connectivity = None
if a_atoms is not None and len(a_atoms) != 3:
raise ZMatError(f'a_atoms must be a list of length 3, got {a_atoms}')
if len(zmat['coords']) <= 2:
# This is the 1st, 2nd, or 3rd atom added to the zmat, there's no dihedral angle definition here.
d_atoms = None
elif d_constraint is not None:
if d_constraint_type not in ['D_atom', 'D_group']:
raise ZMatError(f'Got an invalid D constraint type "{d_constraint_type}" for {d_constraint}')
d_atoms = [n] + [key_by_val(zmat['map'], atom) for atom in d_constraint[1:]]
elif specific_atom is not None and a_atoms is not None:
# A specific atom was specified (e.g., a dummy atom was added to assist defining this atom), consider it.
if not isinstance(specific_atom, int):
raise ZMatError(f'specific atom must be of type int, got {type(specific_atom)}')
d_atoms = a_atoms + [specific_atom]
elif connectivity is not None:
d_atoms = determine_d_atoms_from_connectivity(zmat, xyz, coords, connectivity, a_atoms, atom_index,
dummy=dummy, allow_a_to_be_dummy=False)
if len(d_atoms) < 4:
d_atoms = determine_d_atoms_without_connectivity(zmat, coords, a_atoms, n)
if len(d_atoms) < 4:
d_atoms = determine_d_atoms_from_connectivity(zmat, xyz, coords, connectivity, a_atoms, atom_index,
dummy=dummy, allow_a_to_be_dummy=True)
else:
d_atoms = determine_d_atoms_without_connectivity(zmat, coords, a_atoms, n)
if d_atoms is not None:
if len(d_atoms) < 4:
for i in reversed(range(len(xyz['symbols']))):
if i not in d_atoms and i in list(zmat['map'].keys()):
angle = calculate_param(coords=coords, atoms=[zmat['map'][z_index]
for z_index in d_atoms[1:] + [i]])
if not is_angle_linear(angle, tolerance=TOL_180):
d_atoms.append(i)
break
if len(set(d_atoms)) != 4:
logger.error(f'Could not come up with four unique d_atoms (d_atoms = {d_atoms}). '
f'Setting d_atoms to [{n}, 2, 1, 0]')
d_atoms = [n, 2, 1, 0]
if any([d_atom not in list(zmat['map'].keys()) for d_atom in d_atoms[1:]]):
raise ZMatError(f'A reference D atom in {d_atoms} for the index atom {atom_index} has not been '
f'added to the zmat yet. Added atoms are (zmat index: xyz index): {zmat["map"]}.')
return d_atoms
[docs]
def determine_d_atoms_without_connectivity(zmat: dict,
coords: Union[list, tuple],
a_atoms: list,
n: int,
) -> list:
"""
A helper function to determine d_atoms without connectivity information.
Args:
zmat (dict): The zmat.
coords (Union[list, tuple]): Just the 'coords' part of the xyz dict.
a_atoms (list): The determined a_atoms.
n (int): The 0-index of the atom in the zmat to be added.
Returns:
list: The d_atoms.
"""
d_atoms = [atom for atom in a_atoms]
for i in reversed(range(n)):
if i not in d_atoms and i in list(zmat['map'].keys()) and (i >= len(zmat['symbols']) or not is_dummy(zmat, i)):
try:
angle = calculate_param(coords=coords, atoms=[zmat['map'][z_index] for z_index in d_atoms[1:] + [i]])
except VectorsError:
continue
if not is_angle_linear(angle, tolerance=TOL_180):
d_atoms.append(i)
break
if len(d_atoms) < 4:
# Try again and consider dummies.
for i in reversed(range(n)):
if i not in d_atoms and i in list(zmat['map'].keys()):
try:
angle = calculate_param(coords=coords, atoms=[zmat['map'][z_index] for z_index in d_atoms[1:] + [i]])
except VectorsError:
continue
if not is_angle_linear(angle, tolerance=TOL_180):
d_atoms.append(i)
break
return d_atoms
[docs]
def determine_d_atoms_from_connectivity(zmat: dict,
xyz: dict,
coords: Union[list, tuple],
connectivity: dict,
a_atoms: list,
atom_index: int,
dummy: bool = False,
allow_a_to_be_dummy: bool = False,
) -> list:
"""
A helper function to determine d_atoms from the connectivity information.
Args:
zmat (dict): The zmat.
xyz (dict): The xyz dict.
coords (Union[list, tuple]): Just the 'coords' part of the xyz dict.
connectivity (dict): The atoms connectivity (keys are indices in the mol/xyz).
a_atoms (list): The determined a_atoms.
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat and the molecule/xyz)
dummy (bool, optional): Whether the atom being added (n) represents a dummy atom. ``True`` if it does.
allow_a_to_be_dummy (bool, optional): Whether the last atom ('A') in d_atoms is allowed to by a dummy atom.
``True`` if it is, ``False`` by default.
Returns:
list: The d_atoms.
"""
d_atoms = [atom for atom in a_atoms]
# We need two *integer* z-matrix indices to drive the neighbor search.
# If for some reason one of them is a dummy key ("X..."), skip the connectivity‐based logic.
int_refs = [i for i in d_atoms if isinstance(i, int)]
if len(int_refs) < 2:
return d_atoms
# pick the last two integer refs
ref_last, ref_penult = int_refs[-1], int_refs[-2]
for atom in (connectivity[zmat['map'][ref_last]] +
connectivity[zmat['map'][ref_penult]] +
connectivity[atom_index]):
if atom != atom_index and atom in list(zmat['map'].values()) \
and (not is_dummy(zmat, key_by_val(zmat['map'], atom)) or (not dummy and allow_a_to_be_dummy)):
# Atom A is allowed to be a dummy atom only if the atom represented by n is not.
zmat_index = None
if atom not in list([zmat['map'][d_atom] for d_atom in d_atoms[1:]]):
r"""
Check whether this atom (A) is part of a linear chain. If it is, try to correctly determine
dihedrals in this molecule w/o this atom, otherwise it's meaningless, and the zmat looses info.
X D (atom_index)
\ /
E -- A -- B -- C (r_atoms[1])
/
F (A is the atom considered here, corresponding to 'atom' / 'zmat_index')
"""
i = 0
atom_a, atom_b, atom_c = atom, zmat['map'][d_atoms[2]], zmat['map'][d_atoms[1]]
while i < len(list(connectivity.keys())):
angle = calculate_param(coords=coords, atoms=[atom_a, atom_b, atom_c])
if is_angle_linear(angle, tolerance=TOL_180):
num_of_neighbors = len(list(connectivity[atom_a]))
if num_of_neighbors == 1:
# Atom A is only connected to B, use the dummy atom on atom B as atom A.
b_neighbors = connectivity[atom_b]
x_neighbor = [neighbor for neighbor in b_neighbors
if xyz['symbols'][neighbor] == 'X'][0]
if key_by_val(zmat['map'], f'X{x_neighbor}') not in d_atoms:
zmat_index = key_by_val(zmat['map'], f'X{x_neighbor}')
break
elif num_of_neighbors == 2:
# atom A is only connected to B and E, check the E -- B -- C angle.
a_neighbors = connectivity[atom_a]
atom_e = a_neighbors[0] if a_neighbors[0] != atom_b else a_neighbors[1]
if atom_e in list(zmat['map'].values()):
angle = calculate_param(coords=coords, atoms=[atom_e, atom_b, atom_c])
if is_angle_linear(angle, tolerance=TOL_180):
# E -- B -- C is linear, change indices and test angle F -- B -- C.
atom_a = atom_e
elif key_by_val(zmat['map'], atom_e) not in d_atoms:
# E -- B -- C is not linear, use atom E.
zmat_index = key_by_val(zmat['map'], atom_e)
break
elif num_of_neighbors > 2:
# Atom A is connected to at least one other atom not in this linear chain.
for a_neighbor in connectivity[atom_a]:
if a_neighbor != atom_b:
angle = calculate_param(coords=coords, atoms=[a_neighbor, atom_b, atom_c])
if not is_angle_linear(angle, tolerance=TOL_180) \
and a_neighbor in list(zmat['map'].values()) \
and key_by_val(zmat['map'], a_neighbor) not in d_atoms:
# E -- B -- C is not linear, use atom E (a_neighbor).
zmat_index = key_by_val(zmat['map'], a_neighbor)
break
elif atom_a in list(zmat['map'].values()):
zmat_index = key_by_val(zmat['map'], atom_a)
break
i += 1 # Don't loop forever.
if zmat_index is None and len(d_atoms) == 3 and 'X' in zmat['symbols'] \
and not dummy and allow_a_to_be_dummy:
# Still could not find a non-linear forth d atom, look for dummy atoms.
dummies = [(key, int(val[1:])) for key, val in zmat['map'].items() if re.match(r'X\d', str(val))]
for dummy in dummies:
zmat_index = dummy[0]
if zmat_index is not None:
d_atoms.append(zmat_index)
break
if len(d_atoms) == 3 and len(connectivity[atom_index]) > 2 \
and connectivity[atom_index][2] in list(zmat['map'].values()) \
and connectivity[atom_index][2] not in [zmat['map'][d_atom] for d_atom in d_atoms[1:]]:
angle = calculate_param(coords=coords, atoms=[zmat['map'][d_atom] for d_atom in d_atoms[1:]]
+ [connectivity[atom_index][2]])
if not is_angle_linear(angle, tolerance=TOL_180) \
and connectivity[atom_index][2] in list(zmat['map'].values()) \
and key_by_val(zmat['map'], connectivity[atom_index][2]) not in d_atoms:
d_atoms.append(key_by_val(zmat['map'], connectivity[atom_index][2]))
return d_atoms
def _add_nth_atom_to_zmat(zmat: Dict[str, Union[dict, tuple]],
xyz: Dict[str, tuple],
connectivity: Dict[int, List[int]],
n: int,
atom_index: int,
constraints: Dict[str, List[Tuple[int]]],
fragments: List[List[int]],
) -> Tuple[Dict[str, tuple], Dict[str, tuple], List[int]]:
"""
Add the n-th atom to the zmat (n >= 0).
Also considers the special cases where ``n`` is the first, second, or third atom to be added to the zmat.
Adds a dummy atom if an angle (not a dihedral angle) is 180 (or 0) degrees.
Args:
zmat (dict): The zmat.
xyz (dict): The coordinates.
connectivity (dict): The atoms connectivity (keys are indices in the mol/xyz).
n (int): The 0-index of the atom in the zmat to be added.
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat and the molecule/xyz)
constraints (dict): Constraints to consider.
Accepted keys are:
'R_atom', 'R_group',
'A_atom', 'A_group',
'D_atom', 'D_group'.
fragments (List[List[int]]):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
indices are 0-indexed.
Raises:
ZMatError: If the zmat could not be generated.
Returns:
Tuple[Dict[str, tuple], Dict[str, tuple], List[int]]:
- The updated zmat.
- The xyz coordinates updated with dummy atoms.
- A 0- or 1-length list with the skipped atom index.
"""
num_initial_atoms = len(xyz['symbols'])
coords = xyz['coords']
skipped_atoms = list()
specific_last_d_atom = None
r_constraint, r_constraint_type = check_atom_r_constraints(atom_index, constraints)
a_constraint, a_constraint_type = check_atom_a_constraints(atom_index, constraints)
d_constraint, d_constraint_type = check_atom_d_constraints(atom_index, constraints)
if sum([constraint is not None for constraint in [r_constraint, a_constraint, d_constraint]]) > 1 \
and not (r_constraint is not None and a_constraint is not None
and r_constraint == tuple(a_constraint[:2])) \
and not (a_constraint is not None and d_constraint is not None
and a_constraint == tuple(d_constraint[:3])):
raise ZMatError(f'A single atom cannot be constrained by more than one constraint type, got:\n'
f'R {r_constraint_type}: {r_constraint}\n'
f'A {a_constraint_type}: {a_constraint}\n'
f'D {d_constraint_type}: {d_constraint}')
r_constraints_passed, a_constraints_passed, d_constraints_passed = \
[constraint is None or all([entry in list(zmat['map'].values()) for entry in constraint[1:]])
for constraint in [r_constraint, a_constraint, d_constraint]]
if all([passed for passed in [r_constraints_passed, a_constraints_passed, d_constraints_passed]]):
# Add the n-th atom to the zmat.
# If an '_atom' constraint was specified, only consider this atom if n is the last atom to consider.
if (r_constraint_type == 'R_atom' or a_constraint_type == 'A_atom' or d_constraint_type == 'D_atom') \
and n != num_initial_atoms - 1:
logger.debug(f'Skipping atom index {atom_index} when creating a zmat due to a specified _atom constraint.')
skipped_atoms.append(atom_index)
return zmat, xyz, skipped_atoms
# Determine the atoms for defining the distance, R; this should be [n, <some other atom already in the zmat>].
r_atoms = determine_r_atoms(
zmat,
xyz,
connectivity,
n,
atom_index,
r_constraint,
a_constraint,
d_constraint,
trivial_assignment=any('_atom' in constraint_key for constraint_key in constraints.keys()),
fragments=fragments,
)
# Determine the atoms for defining the angle, A.
if a_constraint is None and d_constraint is not None:
# If a D constraint is given, the A constraint must obey it as well.
a_constraint = d_constraint[:-1]
a_atoms = determine_a_atoms(
zmat,
coords,
connectivity,
r_atoms,
n,
atom_index,
a_constraint,
d_constraint,
a_constraint_type,
trivial_assignment=any('_atom' in constraint_key for constraint_key in constraints.keys()),
fragments=fragments,
)
# Calculate the angle, add a dummy atom if needed.
added_dummy = False
if a_atoms is not None and all([not re.match(r'X\d', str(zmat['map'][atom])) for atom in a_atoms[1:]]):
angle = calculate_param(coords=coords, atoms=[atom_index] + [zmat['map'][atom] for atom in a_atoms[1:]])
if is_angle_linear(angle, tolerance=TOL_180):
# The angle is too close to 180 (or 0) degrees, add a dummy atom.
zmat, coords, n, r_atoms, a_atoms, specific_last_d_atom = \
add_dummy_atom(zmat, xyz, coords, connectivity, r_atoms, a_atoms, n, atom_index)
added_dummy = True
# Determine the atoms for defining the dihedral angle, D.
d_atoms = determine_d_atoms(
zmat,
xyz,
coords,
connectivity,
a_atoms,
n,
atom_index,
d_constraint,
d_constraint_type, specific_atom=specific_last_d_atom,
fragments=fragments,
)
# Update the zmat.
zmat = update_zmat_with_new_atom(zmat, xyz, coords, n, atom_index, r_atoms, a_atoms, d_atoms, added_dummy)
else:
# Some constraints did not "pass": some atoms were not added to the zmat yet; skip this atom until they are.
skipped_atoms.append(atom_index)
xyz['coords'] = coords # Update xyz with the updated coords.
return zmat, xyz, skipped_atoms
[docs]
def update_zmat_with_new_atom(zmat: dict,
xyz: dict,
coords: Union[list, tuple],
n: int,
atom_index: int,
r_atoms: list,
a_atoms: list,
d_atoms: list,
added_dummy: bool = False,
) -> dict:
"""
Update the zmat with a new atom.
Args:
zmat (dict): The zmat.
xyz (dict): The xyz dict.
coords (Union[list, tuple]): Just the 'coords' part of the xyz dict.
n (int): The 0-index of the atom in the zmat to be added.
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat and the molecule/xyz)
r_atoms (list): The R atom index descriptors.
a_atoms (list): The A atom index descriptors.
d_atoms (list): The D atom index descriptors.
added_dummy (bool, optional): Whether a dummy atom was added as the last index of a_atoms.
Returns:
dict: The updated zmat.
"""
zmat['symbols'].append(xyz['symbols'][atom_index])
if atom_index in zmat['map'].values():
raise ZMatError(f'Cannot assign atom {atom_index} to key {n}, it is already in the zmat map: {zmat["map"]}')
zmat['map'][n] = atom_index
if r_atoms is None:
r_str = None
else:
x = 'X' if any(['X' in list([symbol for symbol in zmat['symbols'][atom]]) for atom in r_atoms]) else ''
r_str = f'R{x}_{r_atoms[0]}_{r_atoms[1]}'
if a_atoms is None:
a_str = None
else:
x = 'X' if any(['X' in list([symbol for symbol in zmat['symbols'][atom]]) for atom in a_atoms]) else ''
a_str = f'A{x}_{a_atoms[0]}_{a_atoms[1]}_{a_atoms[2]}'
if d_atoms is None:
d_str = None
else:
x = 'X' if any(['X' in list([symbol for symbol in zmat['symbols'][atom]]) for atom in d_atoms]) else ''
d_str = f'D{x}_{d_atoms[0]}_{d_atoms[1]}_{d_atoms[2]}_{d_atoms[3]}'
for string in [r_str, a_str, d_str]:
if string is not None and string in zmat['vars']:
raise ZMatError(f'{string} is already in vars: {zmat["vars"]}')
zmat['coords'].append((r_str, a_str, d_str))
if any([atoms is not None and any([atoms.count(atom) > 1 for atom in atoms])
for atoms in [r_atoms, a_atoms, d_atoms]]):
raise ZMatError(f'zmat atom specifications must not have repetitions, got:\n'
f'r_atoms = {r_atoms}, a_atoms = {a_atoms}, d_atoms ={d_atoms}')
if r_atoms is not None:
zmat['vars'][r_str] = calculate_param(coords=coords, atoms=[zmat['map'][atom] for atom in r_atoms])
if added_dummy:
zmat['vars'][a_str] = 90.0
# The dihedral angle could be either 0 or 180 degrees, depends on the relative position of atom D and B, C
# d_atoms represent the zmat indices of atoms D, C, X, and B.
bcd_angle = calculate_param(coords=coords, atoms=[zmat['map'][d_atoms[3]], zmat['map'][d_atoms[1]],
zmat['map'][d_atoms[0]]], index=0)
if 180 - TOL_180 < bcd_angle <= 180:
zmat['vars'][d_str] = 180.0
elif 0 <= bcd_angle < TOL_180:
zmat['vars'][d_str] = 0.0
else:
raise ZMatError(f'Atoms {d_atoms} for a non-linear sequence with an angle of {bcd_angle}. '
f'Expected a linear sequence when using a dummy atom.')
else:
if a_atoms is not None:
zmat['vars'][a_str] = calculate_param(coords=coords, atoms=[zmat['map'][atom] for atom in a_atoms])
if d_atoms is not None:
zmat['vars'][d_str] = calculate_param(coords=coords, atoms=[zmat['map'][atom]
for atom in d_atoms])
return zmat
[docs]
def add_dummy_atom(zmat: dict,
xyz: dict,
coords: Union[list, tuple],
connectivity: dict,
r_atoms: list,
a_atoms: list,
n: int,
atom_index: int,
) -> Tuple[dict, list, int, list, list, int]:
"""
Add a dummy atom 'X' to the zmat.
Also updates the r_atoms and a_atoms lists for the original (non-dummy) atom.
Args:
zmat (dict): The zmat.
xyz (dict): The xyz dict.
coords (Union[list, tuple]): Just the 'coords' part of the xyz dict.
connectivity (dict): The atoms connectivity (keys are indices in the mol/xyz).
r_atoms (list): The determined r_atoms.
a_atoms (list): The determined a_atoms.
n (int): The 0-index of the atom in the zmat to be added.
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat and the molecule/xyz)
Returns:
Tuple[dict, list, int, list, list, int]:
- The zmat.
- The coordinates (list of tuples).
- The updated atom index in the zmat.
- The R atom indices.
- The A atom indices.
- A specific atom index to be used as the last entry of the D atom indices.
"""
zmat['symbols'].append('X')
zmat['map'][n] = f'X{len(xyz["symbols"])}'
xyz['symbols'] += ('X',)
xyz['isotopes'] += ('None',)
# Determine the atoms for defining the dihedral angle, D, **for the dummy atom, X**.
d_atoms = determine_d_atoms(zmat, xyz, coords, connectivity, a_atoms, n, atom_index, dummy=True)
r_str = f'RX_{r_atoms[0]}_{r_atoms[1]}'
a_str = f'AX_{a_atoms[0]}_{a_atoms[1]}_{a_atoms[2]}'
d_str = f'DX_{d_atoms[0]}_{d_atoms[1]}_{d_atoms[2]}_{d_atoms[3]}' if d_atoms is not None else None
zmat['coords'].append((r_str, a_str, d_str)) # the coords of the dummy atom
zmat['vars'][r_str] = 1.0
zmat['vars'][a_str] = 90.0
if d_str is not None:
zmat['vars'][d_str] = 180
# Update xyz with the dummy atom (useful when this atom is used to define dihedrals of other atoms).
coords = _add_nth_atom_to_coords(zmat=zmat, coords=list(coords), i=n)
if connectivity is not None:
# Update the connectivity dict to reflect that X is connected to the respective atom (r_atoms[1]),
# this will help later in avoiding linear angles in the last three indices of a dihedral.
connectivity[zmat['map'][r_atoms[1]]].append(int(zmat['map'][n][1:])) # Take from 'X15'.
connectivity[int(zmat['map'][n][1:])] = [zmat['map'][r_atoms[1]]]
# Before adding the original (non-dummy) atom, increase n due to the increased number of atoms.
n += 1
# Store atom B's index for the dihedral of atom D.
specific_last_d_atom = a_atoms[-1]
# Update the r_atoms and a_atoms for the original (non-dummy) atom (d_atoms is set below).
#
# X (dummy atom)
# |
# B -- C -- D (original atom)
# /
# A (optional)
r_atoms = [n, r_atoms[1]] # make this (D, C)
a_atoms = r_atoms + [n - 1] # make this (D, C, X)
return zmat, coords, n, r_atoms, a_atoms, specific_last_d_atom
[docs]
def zmat_to_coords(zmat: dict,
keep_dummy: bool = False,
skip_undefined: bool = False,
) -> Tuple[List[dict], List[str]]:
"""
Generate the cartesian coordinates from a zmat dict.
Considers the zmat atomic map so the returned coordinates is ordered correctly.
Most common isotopes assumed, if this is not the case, then isotopes should be reassigned to the xyz.
This function assumes that all zmat variables relate to already defined atoms with a lower index in the zmat.
This function implements the SN-NeRF algorithm as described in:
J. Parsons, J.B. Holmes, J.M Rojas, J. Tsai, C.E.M. Strauss, "Practical Conversion from Torsion Space to Cartesian
Space for In Silico Protein Synthesis", Journal of Computational Chemistry 2005, 26 (10), 1063-1068,
https://doi.org/10.1002/jcc.20237
Tested in converterTest.py rather than zmatTest
Args:
zmat (dict): The zmat.
keep_dummy (bool): Whether to keep dummy atoms ('X'), ``True`` to keep, default is ``False``.
skip_undefined (bool): Whether to skip atoms with undefined variables, instead of raising an error.
``True`` to skip, default is ``False``.
Raises:
ZMatError: If zmat is of wrong type or does not contain all keys.
Returns: Tuple[List[dict], List[str]]
- The cartesian coordinates.
- The atomic symbols corresponding to the coordinates.
"""
if not isinstance(zmat, dict):
raise ZMatError(f'zmat has to be a dictionary, got {type(zmat)}')
if 'symbols' not in zmat or 'coords' not in zmat or 'vars' not in zmat or 'map' not in zmat:
raise ZMatError(f'Expected to find symbols, coords, vars, and map in zmat, got instead: {list(zmat.keys())}.')
if not len(zmat['symbols']) == len(zmat['coords']) == len(zmat['map']):
raise ZMatError(f'zmat sections symbols, coords, and map have different lengths: {len(zmat["symbols"])}, '
f'{len(zmat["coords"])}, and {len(zmat["map"])}, respectively.')
for key, value in zmat['vars'].items():
if value is None:
raise ZMatError(f'Got ``None`` for var {key} in zmat:\n{zmat}')
var_list = list(zmat['vars'].keys())
coords_to_skip = list()
for i, coords in enumerate(zmat['coords']):
for coord in coords:
if coord is not None and coord not in var_list:
if skip_undefined:
coords_to_skip.append(i)
else:
raise ZMatError(f'The parameter {coord} was not found in the "vars" section of '
f'the zmat:\n{zmat["vars"]}')
coords = list()
for i in range(len(zmat['symbols'])):
coords = _add_nth_atom_to_coords(zmat=zmat, coords=coords, i=i, coords_to_skip=coords_to_skip)
# Reorder the xyz according to the zmat map and remove dummy atoms if requested.
ordered_coords, ordered_symbols = list(), list()
for i in range(len(zmat['symbols'])):
zmat_index = key_by_val(zmat['map'], i)
if zmat_index < len(coords) and i not in coords_to_skip and (zmat['symbols'][zmat_index] != 'X' or keep_dummy):
ordered_coords.append(coords[zmat_index])
ordered_symbols.append(zmat['symbols'][zmat_index])
return ordered_coords, ordered_symbols
def _add_nth_atom_to_coords(zmat: dict,
coords: list,
i: int,
coords_to_skip: Optional[list] = None,
) -> list:
"""
Add the n-th atom to the coords (n >= 0).
Args:
zmat (dict): The zmat.
coords (list): The coordinates to be updated (not the entire xyz dict).
i (int): The atom number in the zmat to be added to the coords (0-indexed)
coords_to_skip (list, optional): Entries are indices to skip.
Returns:
list: The updated coords.
"""
coords_to_skip = coords_to_skip or list()
if i == 0:
# Add the 1st atom.
coords.append((0.0, 0.0, 0.0)) # atom A is placed at the origin
elif i == 1:
# Add the 2nd atom.
r_key = zmat['coords'][i][0]
coords.append((0.0, 0.0, zmat['vars'][r_key])) # atom B is placed on axis Z, distant by the AB bond length
elif i == 2:
# Add the 3rd atom (atom "C").
r_key, a_key = zmat['coords'][i][0], zmat['coords'][i][1]
bc_length = zmat['vars'][r_key]
alpha = zmat['vars'][a_key]
alpha = math.radians(alpha if alpha < 180 else 360 - alpha)
b_index = [indices for indices in get_atom_indices_from_zmat_parameter(r_key) if indices[0] == i][0][1]
b_z = coords[b_index][2]
c_y = bc_length * math.sin(alpha)
r"""
We differentiate between two cases for c_z:
Either atom A is at the origin (case 1), or atom B is at the origin (case 2).
One of them has to be at the origin (0, 0, 0), since we're adding the 3rd atom (so either A or B were 1st).
y
^ C C
| (1) \ or (2) /
L__ > z A -- B B -- A
In case 1, we need to deduct len(B-C) from the z coordinate of atom B,
but in case 2 we need to take the positive value of len(B-C).
The above is also true if alpha(A-B-C) is > 90 degrees.
"""
c_z = b_z - bc_length * math.cos(alpha) if b_z else bc_length * math.cos(alpha)
coords.append((0.0, c_y, c_z))
elif i not in coords_to_skip:
d_indices = [indices for indices in get_atom_indices_from_zmat_parameter(zmat['coords'][i][2])
if indices[0] == i][0]
a_index, b_index, c_index = d_indices[3], d_indices[2], d_indices[1]
# Atoms B and C aren't necessarily connected in the zmat, calculate from coords.
bc_length = get_vector_length([coords[c_index][0] - coords[b_index][0],
coords[c_index][1] - coords[b_index][1],
coords[c_index][2] - coords[b_index][2]])
cd_length = zmat['vars'][zmat['coords'][i][0]]
bcd_angle = math.radians(zmat['vars'][zmat['coords'][i][1]])
abcd_dihedral = math.radians(zmat['vars'][zmat['coords'][i][2]])
# A vector pointing from atom A to atom B:
ab = [(coords[b_index][0] - coords[a_index][0]),
(coords[b_index][1] - coords[a_index][1]),
(coords[b_index][2] - coords[a_index][2])]
# A normalized vector pointing from atom B to atom C:
ubc = [(coords[c_index][0] - coords[b_index][0]) / bc_length,
(coords[c_index][1] - coords[b_index][1]) / bc_length,
(coords[c_index][2] - coords[b_index][2]) / bc_length]
n = np.cross(ab, ubc)
un = n / get_vector_length(n)
un_cross_ubc = np.cross(un, ubc)
# The transformation matrix:
m = np.array([[ubc[0], un_cross_ubc[0], un[0]],
[ubc[1], un_cross_ubc[1], un[1]],
[ubc[2], un_cross_ubc[2], un[2]]], np.float64)
# Place atom D in a default coordinate system.
d = np.array([- cd_length * math.cos(bcd_angle),
cd_length * math.sin(bcd_angle) * math.cos(abcd_dihedral),
cd_length * math.sin(bcd_angle) * math.sin(abcd_dihedral)])
d = m.dot(d) # Rotate the coordinate system into the reference frame of orientation defined by A, B, C.
# Add the coordinates of atom C to the resulting atom D:
coords.append((float(d[0] + coords[c_index][0]), float(d[1] + coords[c_index][1]), float(d[2] + coords[c_index][2])))
return coords
[docs]
def check_atom_r_constraints(atom_index: int,
constraints: dict,
) -> Tuple[Optional[tuple], Optional[str]]:
"""
Check distance constraints for an atom.
'R' constraints are a list of tuples with length 2.
The first atom in an R constraint is considered "constraint" for zmat generation:
its distance will be relative to the second atom.
Args:
atom_index (int): The 0-indexed atom index to check.
constraints (dict): The 'R', 'A', 'D' constraints dict. Values are lists of constraints.
Raises:
ZMatError: If the R constraint lengths do not equal two, or if the atom is constrained more than once.
Returns:
Tuple[Optional[tuple], Optional[str]]:
- The atom index to which the atom being checked is constrained. ``None`` if it is not constrained.
- The constraint type ('R_atom', or 'R_group').
"""
if not any(['R' in key for key in constraints.keys()]):
return None, None
r_constraints = {key: val for key, val in constraints.items() if 'R' in key}
for r_constraint_list in r_constraints.values():
if any([len(r_constraint_tuple) != 2 for r_constraint_tuple in r_constraint_list]):
raise ZMatError(f'"R" constraints must contain only tuples of length two, got: {r_constraints}.')
if any([r_constraint_type not in ['R_atom', 'R_group'] for r_constraint_type in r_constraints.keys()]):
raise ZMatError(f'"R" constraints must be either "R_atom" or "R_group", got: {r_constraints}.')
occurrences = 0
for r_constraint_list in r_constraints.values():
occurrences += sum([r_constraint[0] == atom_index for r_constraint in r_constraint_list])
if not occurrences:
return None, None
if occurrences > 1:
raise ZMatError(f'A single atom cannot be constrained more than once. Atom {atom_index} is constrained '
f'{occurrences} times in "R" constraints.')
# At this point there's only one occurrence of this constraint, find it and report the constraining atom.
for constraint_type, r_constraint_list in r_constraints.items():
for r_constraint in r_constraint_list:
if r_constraint[0] == atom_index:
return r_constraint, constraint_type
[docs]
def check_atom_a_constraints(atom_index: int,
constraints: dict,
) -> Tuple[Optional[tuple], Optional[str]]:
"""
Check angle constraints for an atom.
'A' constraints are a list of tuples with length 3.
The first atom in an A constraint is considered "constraint" for zmat generation:
its angle will be relative to the second and third atoms.
Args:
atom_index (int): The 0-indexed atom index to check.
constraints (dict): The 'R', 'A', 'D' constraints dict. Values are lists of constraints.
Raises:
ZMatError: If the A constraint lengths do not equal three, or if the atom is constrained more than once.
Returns:
Tuple[Optional[tuple], Optional[str]]:
- The atom indices to which the atom being checked is constrained. ``None`` if it is not constrained.
- The constraint type ('A_atom', or 'A_group'). ``None`` if it is not constrained.
"""
if not any(['A' in key for key in constraints.keys()]):
return None, None
a_constraints = {key: val for key, val in constraints.items() if 'A' in key}
for a_constraint_list in a_constraints.values():
if any([len(a_constraint_tuple) != 3 for a_constraint_tuple in a_constraint_list]):
raise ZMatError(f'"A" constraints must contain only tuples of length three, got: {a_constraints}.')
if any([a_constraint_type not in ['A_atom', 'A_group'] for a_constraint_type in a_constraints.keys()]):
raise ZMatError(f'"A" constraints must be either "A_atom" or "A_group", got: {a_constraints}.')
occurrences = 0
for a_constraint_list in a_constraints.values():
occurrences += sum([a_constraint[0] == atom_index for a_constraint in a_constraint_list])
if not occurrences:
return None, None
if occurrences > 1:
raise ZMatError(f'A single atom cannot be constrained more than once. Atom {atom_index} is constrained '
f'{occurrences} times in "A" constraints.')
# At this point there's only one occurrence of this constraint, find it and report the constraining atoms.
for constraint_type, a_constraint_list in a_constraints.items():
for a_constraint in a_constraint_list:
if a_constraint[0] == atom_index:
return a_constraint, constraint_type
[docs]
def check_atom_d_constraints(atom_index: int,
constraints: dict,
) -> Tuple[Optional[tuple], Optional[str]]:
"""
Check dihedral angle constraints for an atom.
'D' constraints are a list of tuples with length 4.
The first atom in a D constraint is considered "constraint" for zmat generation:
its dihedral angle will be relative to the second, third, and forth atoms.
Args:
atom_index (int): The 0-indexed atom index to check.
constraints (dict): The 'R', 'A', 'D' constraints dict. Values are lists of constraints.
Raises:
ZMatError: If the A constraint lengths do not equal three, or if the atom is constrained more than once.
Returns:
Tuple[Optional[tuple], Optional[str]]:
- The atom indices to which the atom being checked is constrained. ``None`` if it is not constrained.
- The constraint type ('D_atom', 'D_group').
"""
if not any(['D' in key for key in constraints.keys()]):
return None, None
d_constraints = {key: val for key, val in constraints.items() if 'D' in key}
for d_constraint_list in d_constraints.values():
if any([len(d_constraint_tuple) != 4 for d_constraint_tuple in d_constraint_list]):
raise ZMatError(f'"D" constraints must contain only tuples of length four, got: {d_constraints}.')
if any([d_constraint_type not in ['D_atom', 'D_group'] for d_constraint_type in d_constraints.keys()]):
raise ZMatError(f'"D" constraints must be either "D_atom", or "D_group", got: {d_constraints}.')
occurrences = 0
for d_constraint_list in d_constraints.values():
occurrences += sum([d_constraint[0] == atom_index for d_constraint in d_constraint_list])
if not occurrences:
return None, None
if occurrences > 1:
raise ZMatError(f'A single atom cannot be constrained more than once. Atom {atom_index} is constrained '
f'{occurrences} times in "D" constraints.')
# At this point there's only one occurrence of this constraint, find it and report the constraining atoms.
for constraint_type, d_constraint_list in d_constraints.items():
for d_constraint in d_constraint_list:
if d_constraint[0] == atom_index:
return d_constraint, constraint_type
[docs]
def is_dummy(zmat: dict,
zmat_index: int,
) -> bool:
"""
Determine whether an atom in a zmat is a dummy atom by its zmat index.
Args:
zmat (dict): The zmat with symbol and map information.
zmat_index (int): The atom index in the zmat.
Raises:
ZMatError: If the index is invalid.
Returns:
bool: Whether the atom represents a dummy atom 'X'. ``True`` if it does.
"""
if len(zmat['symbols']) <= zmat_index:
raise ZMatError(f'index {zmat_index} is invalid for a zmat with only {len(zmat["symbols"])} atoms')
return zmat['symbols'][zmat_index] == 'X'
[docs]
def get_atom_connectivity_from_mol(mol: Molecule,
atom1: 'Atom',
) -> List[int]:
"""
Get the connectivity of ``atom`` in ``mol``.
Returns heavy (non-H) atoms first.
Args:
mol (Molecule): The molecule with connectivity information.
atom1 (Atom): The atom to check connectivity for.
Returns:
List[int]: 0-indices of atoms in ``mol`` connected to ``atom``.
"""
return [mol.atoms.index(atom2) for atom2 in list(atom1.edges.keys()) if atom2.is_non_hydrogen()] \
+ [mol.atoms.index(atom2) for atom2 in list(atom1.edges.keys()) if atom2.is_hydrogen()]
[docs]
def get_connectivity(mol: Molecule) -> Dict[int, List[int]]:
"""
Get the connectivity information from the molecule object.
Args:
mol (Molecule): The Molecule object.
Returns:
Dict[int, List[int]]: The connectivity information.
Keys are atom indices, values are tuples of respective edges, ordered with heavy atoms first.
All indices are 0-indexed, corresponding to atom indices in ``mol`` (not in the zmat).
``None`` if ``xyz`` is given.
"""
connectivity = dict()
for atom in mol.atoms:
connectivity[mol.atoms.index(atom)] = get_atom_connectivity_from_mol(mol, atom)
return connectivity
[docs]
def order_fragments_by_constraints(fragments: List[List[int]],
constraints_dict: Optional[Dict[str, List[tuple]]] = None,
) -> List[List[int]]:
"""
Get the order in which atoms should be added to the zmat from a 2D or a 3D representation.
Args:
fragments (List[List[int]]):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
constraints_dict (dict, optional):
A dictionary of atom constraints. The function will try to find an atom order in which all constrained atoms
are after the atoms they are constraint to.
Returns:
List[List[int]]: The ordered fragments list.
"""
if constraints_dict is None or not len(fragments):
return fragments
constraints_in_fragments = list()
for i in range(len(fragments)):
# Initialize with general constraint types and the original index.
constraints_in_fragments.append({'R': 0, 'A': 0, 'D': 0, 'i': i})
for constraint_type, constraint_list in constraints_dict.items():
for constraint in constraint_list:
for i, fragment in enumerate(fragments):
if all([c in fragment for c in constraint]):
constraints_in_fragments[i][constraint_type[0]] += 1
constraints_in_fragments.sort(key=operator.itemgetter('R', 'A', 'D'), reverse=False)
new_order = [constraint['i'] for constraint in constraints_in_fragments]
new_fragments = [[]] * len(fragments)
for fragment, i in zip(fragments, new_order):
new_fragments[i] = fragment
return new_fragments
[docs]
def get_atom_order(xyz: Optional[Dict[str, tuple]] = None,
mol: Optional[Molecule] = None,
fragments: Optional[List[List[int]]] = None,
constraints_dict: Optional[Dict[str, List[tuple]]] = None,
) -> List[int]:
"""
Get the order in which atoms should be added to the zmat from a 2D or a 3D representation.
Args:
xyz (dict, optional): The 3D coordinates.
mol (Molecule, optional): The Molecule object.
fragments (List[List[int]], optional):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
constraints_dict (dict, optional):
A dictionary of atom constraints. The function will try to find an atom order in which all constrained atoms
are after the atoms they are constraint to.
Returns:
List[int]: The atom order, 0-indexed.
"""
if mol is None and xyz is None:
raise ValueError('Either mol or xyz must be given.')
if fragments is None or not len(fragments):
if mol is not None:
fragments = [list(range(len(mol.atoms)))]
if xyz is not None:
fragments = [list(range(len(xyz['symbols'])))]
else:
fragments = order_fragments_by_constraints(fragments=fragments, constraints_dict=constraints_dict)
atom_order = list()
if mol is not None:
for fragment in fragments:
sequence = get_atom_order_from_mol(mol=mol, fragment=fragment, constraints_dict=constraints_dict, xyz=xyz)
for i in sequence:
if i not in atom_order:
atom_order.append(i)
elif xyz is not None:
for fragment in fragments:
sequence = get_atom_order_from_xyz(xyz, fragment=fragment)
for i in sequence:
if i not in atom_order:
atom_order.append(i)
return atom_order
def _accumulated_atom_order(fragments: List[List[int]],
order_fn: Callable[[List[int]], List[int]]
) -> List[int]:
"""
Apply a cumulative offset to each fragment’s local atom‐order.
fragments: list of fragments (each is a list of atom‐indices)
order_fn: callable that takes one fragment and returns its local ordering
"""
atom_order: List[int] = list()
offset = 0
for frag in fragments:
local = order_fn(frag)
atom_order.extend(i + offset for i in local)
offset += len(frag)
return atom_order
[docs]
def get_atom_order_from_mol(mol: Molecule,
fragment: List[int] = None,
constraints_dict: Optional[Dict[str, List[tuple]]] = None,
xyz: Optional[dict] = None,
) -> List[int]:
"""
Determine Z-matrix atom order based on molecular connectivity and an optional single constraint.
Uses a BFS on the heavy-atom graph to establish a base ordering, then partitions
unconstrained atoms from constrained (moving) atoms/groups so that unconstrained
heavies come first, then unconstrained hydrogens, then constrained anchors/move atoms
(or their entire groups).
Args:
mol (Molecule): The Molecule object containing ``atoms`` and ``edges``.
fragment (List[int], optional): Optional list of 0-based atom indices to include; defaults to all atoms.
constraints_dict: Optional dict with a single constraint type:
'R_atom': [(move_idx, anchor_idx)]
'R_group': [(move_idx, anchor_idx)]
'A_atom': [(move_idx, ref1, ref2)]
'A_group': [(move_idx, ref1, ref2)]
'D_atom': [(move_idx, ref1, ref2, ref3)]
'D_group': [(move_idx, ref1, ref2, ref3)]
'D_groups': [(pivot2, pivot3, ref?, ref?)]
xyz (dict, optional): Optional 3D coordinates dict, used for checking linear motifs to avoid starting with them.
Returns:
A list of atom indices in Z-matrix order, where any unconstrained
heavy atoms come first, then unconstrained hydrogens, then the
constrained anchor(s) and moving atom(s) or their groups, so that
modifying the specified internal coordinate translates/rotates only
the intended atoms.
"""
# 1) Normalize inputs
fragment = set(fragment or range(len(mol.atoms)))
constraints = constraints_dict or {}
# only handle first constraint if multiple present
if not constraints:
active = None
else:
key, tpl_list = next(iter(constraints.items()))
active = (key, tpl_list[0])
# 2) Identify constrained set
constrained_set = set()
if active:
key, tpl = active
# add basic indices
constrained_set.update(tpl)
# for group types, BFS collect additional move-group
if key in ('R_group', 'A_group', 'D_group'):
root, anchor = tpl[0], tpl[1]
seen = {root}
queue = [root]
while queue:
i = queue.pop(0)
for nbr in mol.atoms[i].edges:
j = mol.atoms.index(nbr)
if j in fragment and j not in seen and not (i == root and j == anchor):
seen.add(j)
queue.append(j)
constrained_set |= seen
elif key == 'D_groups':
# tpl has 4 indices: (move?, pivot2, pivot3, ref?)
# BFS from pivot2 and pivot3
for root in tpl[:2]:
seen = {root}
queue = [root]
while queue:
i = queue.pop(0)
for nbr in mol.atoms[i].edges:
j = mol.atoms.index(nbr)
if j in fragment and j not in seen:
seen.add(j)
queue.append(j)
constrained_set |= seen
# 3) Base BFS ordering on heavy-atom graph
# find start: a heavy with <=1 heavy neighbor not in constrained_set if possible
# Avoid atoms that are A in a linear A–B–C motif
def find_start(avoid_linear: bool = True) -> Optional[int]:
"""Find a suitable start atom in the fragment."""
for atom in mol.atoms:
i = mol.atoms.index(atom)
if (
i in fragment and
not atom.is_hydrogen() and
i not in constrained_set and
(not avoid_linear or not is_atom_in_linear_angle(i=i, xyz=xyz, mol=mol)) and
sum(nb.is_non_hydrogen() for nb in atom.edges) <= 1
):
return i
for atom in mol.atoms:
i = mol.atoms.index(atom)
if (
i in fragment and
not atom.is_hydrogen() and
i not in constrained_set and
(not avoid_linear or not is_atom_in_linear_angle(i=i, xyz=xyz, mol=mol))
):
return i
for atom in mol.atoms:
i = mol.atoms.index(atom)
if i in fragment and not atom.is_hydrogen():
return i
return next(iter(fragment))
# Attempt to find a non-linear start; fall back if needed
start = find_start(avoid_linear=True)
visited = set()
base_heavies = list()
queue = [start]
while queue:
i = queue.pop(0)
if i in visited or i not in fragment:
continue
visited.add(i)
if not mol.atoms[i].is_hydrogen():
base_heavies.append(i)
# enqueue heavy neighbors
for nbr in mol.atoms[i].edges:
j = mol.atoms.index(nbr)
if j not in visited and not mol.atoms[j].is_hydrogen() and j in fragment:
queue.append(j)
# 4) Collect hydrogens in fragment
base_hydrogens = [i for i in range(len(mol.atoms))
if i in fragment and mol.atoms[i].is_hydrogen()]
# 5) Partition: unconstrained heavy atoms and hydrogens
heavy_uncon = [i for i in base_heavies if i not in constrained_set]
h_uncon = [i for i in base_hydrogens if i not in constrained_set]
# 6) Build tail ordering for the active constraint
tail = []
if active:
key, tpl = active
if key == 'R_atom':
# anchor then moving atom
tail = [tpl[1], tpl[0]]
elif key == 'A_atom':
# reverse tuple order: ref2, ref1, move
tail = [tpl[2], tpl[1], tpl[0]]
elif key == 'D_atom':
# reverse 4-tuple: ref3, ref2, ref1, move
tail = [tpl[3], tpl[2], tpl[1], tpl[0]]
elif key == 'R_group':
# BFS group from move atom excluding anchor
root, anchor = tpl
seen = {root}
queue = [root]
while queue:
i = queue.pop(0)
for nbr in mol.atoms[i].edges:
j = mol.atoms.index(nbr)
if j in fragment and j not in seen and not (i == root and j == anchor):
seen.add(j)
queue.append(j)
group = [root] + [x for x in seen if x != root]
tail = [anchor, root] + [x for x in group if x != root]
elif key == 'A_group':
root, ref1, ref2 = tpl
seen = {root}
queue = [root]
while queue:
i = queue.pop(0)
for nbr in mol.atoms[i].edges:
j = mol.atoms.index(nbr)
if j in fragment and j not in seen and not (i == root and j in (ref1, ref2)):
seen.add(j)
queue.append(j)
group = [root] + [x for x in seen if x != root]
tail = [ref2, ref1, root] + [x for x in group if x != root]
elif key == 'D_group':
root, ref1, ref2, ref3 = tpl
seen = {root}
queue = [root]
while queue:
i = queue.pop(0)
for nbr in mol.atoms[i].edges:
j = mol.atoms.index(nbr)
if j in fragment and j not in seen and not (i == root and j == ref1):
seen.add(j)
queue.append(j)
group = [root] + [x for x in seen if x != root]
tail = [ref3, ref2, ref1, root] + [x for x in group if x != root]
elif key == 'D_groups':
# tpl: (move?, pivot2, pivot3, ref?)
pivot2, pivot3, ref1, ref2 = tpl[:4]
# BFS from pivot2 and pivot3
seen_all = set()
for root in (pivot2, pivot3):
seen = {root}
queue = [root]
while queue:
i = queue.pop(0)
for nbr in mol.atoms[i].edges:
j = mol.atoms.index(nbr)
if j in fragment and j not in seen:
seen.add(j)
queue.append(j)
seen_all |= seen
tail = [ref2, ref1, pivot3, pivot2] + [x for x in seen_all if x not in (pivot2, pivot3)]
# 7) Final atom_order: unconstrained heavies, unconstrained H's, then tail
atom_order = heavy_uncon + h_uncon + tail
return atom_order
[docs]
def is_atom_in_linear_angle(i: int, xyz: Optional[dict], mol: Molecule, tol: float = 0.9) -> bool:
"""
Check if atom i is involved in a linear A–B–C motif (i.e., angle ~180),
whether as A, B, or C.
"""
if not xyz:
return False
for b in range(len(mol.atoms)):
b_neighbors = [mol.atoms.index(atom) for atom in mol.atoms[b].edges]
for a in b_neighbors:
for c in b_neighbors:
if a >= c:
continue # avoid redundant pairs and self-pairs
if i not in (a, b, c):
continue
angle = calculate_param(coords=xyz['coords'], atoms=[a, b, c])
if is_angle_linear(angle, tolerance=tol):
return True
return False
[docs]
def get_atom_order_from_xyz(xyz: Dict[str, tuple],
fragment: Optional[List[int]] = None,
) -> List[int]:
"""
Get the order in which atoms should be added to the zmat from the 3D geometry.
Args:
xyz (dict): The 3D coordinates.
fragment (List[int], optional): Entries are 0-indexed atom indices to consider in the molecule.
Only atoms within the fragment are considered.
Returns:
List[int]: The atom order, 0-indexed.
"""
fragment = fragment or list(range(len(xyz['symbols'])))
atom_order, hydrogens = list(), list()
for i, symbol in enumerate(xyz['symbols']):
if i in fragment:
if symbol == 'H':
hydrogens.append(i)
else:
atom_order.append(i)
atom_order.extend(hydrogens)
return atom_order
[docs]
def consolidate_zmat(zmat: dict,
mol: Optional[Molecule] = None,
consolidation_tols: Optional[dict] = None,
) -> dict:
"""
Consolidate (almost) identical vars in the zmat.
Args:
zmat (dict): The zmat.
mol (Molecule, optional): The RMG molecule, used for atom types if not None,
otherwise atom symbols from the zmat are used.
consolidation_tols (dict, optional): Keys are 'R', 'A', 'D', values are floats representing absolute tolerance
for consolidating almost equal internal coordinates.
Returns:
dict: The consolidated zmat.
"""
consolidation_tols = consolidation_tols or dict()
# Assign defaults if needed:
if 'R' not in consolidation_tols:
consolidation_tols['R'] = DEFAULT_CONSOLIDATION_R_TOL
if 'A' not in consolidation_tols:
consolidation_tols['A'] = DEFAULT_CONSOLIDATION_A_TOL
if 'D' not in consolidation_tols:
consolidation_tols['D'] = DEFAULT_CONSOLIDATION_D_TOL
zmat['coords'] = list(zmat['coords']) # Make sure it is mutable.
keys_to_consolidate1, keys_to_consolidate2 = {'R': list(), 'A': list(), 'D': list()}, \
{'R': list(), 'A': list(), 'D': list()}
# Identify potential keys to consolidate, save in keys_to_consolidate1.
for i, key1 in enumerate(zmat['vars'].keys()):
if key1 is not None and not any([key1 in keys for keys in keys_to_consolidate1[key1[0]]]):
dup_keys = list()
for j, key2 in enumerate(zmat['vars'].keys()):
if j > i and key2 is not None and key1[0] == key2[0] \
and abs(zmat['vars'][key1] - zmat['vars'][key2]) < consolidation_tols[key1[0]]:
for key in [key1, key2]:
if key not in dup_keys:
dup_keys.append(key)
if len(dup_keys):
appended = False
for dup_key in dup_keys:
for j in range(len(keys_to_consolidate1[key1[0]])):
if dup_key in keys_to_consolidate1[key1[0]][j]:
# There are two lists that must be combined during consolidation,
# but weren't identified as such (probably ony some variables are within the tolerance).
keys_to_consolidate1[key1[0]][j] = \
sorted(list(set(keys_to_consolidate1[key1[0]][j] + dup_keys)))
appended = True
if not appended:
keys_to_consolidate1[key1[0]].append(dup_keys)
# Check atom types before consolidating, save in keys_to_consolidate2.
for key_type in ['R', 'A', 'D']:
for keys in keys_to_consolidate1[key_type]:
atoms_dict, indices_to_pop = dict(), list()
for key in keys:
indices = [int(index) for index in key.split('_')[1:]]
if any([zmat['symbols'][index] == 'X' for index in indices]):
# This is a dummy atom, don't check atoms, always consolidate.
atoms_dict[key] = key[:2]
else:
if mol is not None and all([zmat['map'][index] != 'X' for index in indices]):
# Use atom types.
atoms_dict[key] = tuple(mol.atoms[zmat['map'][index]].atomtype.label for index in indices)
else:
# Use symbols.
atoms_dict[key] = tuple(zmat['symbols'][index] for index in indices)
atoms_sets = list(set(list(atoms_dict.values())))
for i, atoms_set in enumerate(atoms_sets):
# Check if the reverse is there as well, e.g., ('C', 'H') is the same as ('H', 'C')),
# but if popping the first, don't pop the second...
# also, avid popping symmetric tuples such as ('O', 'O'), or ('H', 'C', 'H').
if tuple(reversed(atoms_set)) in atoms_sets \
and tuple(reversed(atoms_set)) not in [atom_set for j, atom_set in enumerate(atoms_sets)
if j in indices_to_pop] \
and atoms_set != tuple(reversed(atoms_set)):
indices_to_pop.append(i)
for i in reversed(range(len(atoms_sets))):
if i in indices_to_pop:
atoms_sets.pop(i)
for atoms_tuple in atoms_sets:
keys_to_consolidate2[key_type].append([key for key, val in atoms_dict.items()
if val == atoms_tuple or tuple(reversed(val)) == atoms_tuple])
# Consolidate the zmat.
for key_type in ['R', 'A', 'D']:
for keys in keys_to_consolidate2[key_type]:
# Generate a new key name.
indices, new_indices = list(), list()
for i in range(len(keys[0].split('_')[1:])):
indices.append([key.split('_')[1:][i] for key in keys])
for i in range(len(keys[0].split('_')[1:])):
new_indices.append('|'.join(str(index) for index in indices[i]))
if any(['X' in list([zmat['symbols'][int(index)] for index in entry]) for entry in indices]):
new_key = '_'.join([key_type + 'X'] + new_indices)
else:
new_key = '_'.join([key_type] + new_indices)
# Replace all occurrences in zmat['coords'].
for i in range(len(zmat['coords'])):
if any([coord in keys for coord in zmat['coords'][i]]):
new_coord = list()
for coord in zmat['coords'][i]:
if coord not in keys:
new_coord.append(coord)
else:
new_coord.append(new_key)
zmat['coords'][i] = tuple(new_coord)
# Replace all occurrences in zmat['vars'] with the new key and an average value.
if all([key in list(zmat['vars'].keys()) for key in keys]):
values = [zmat['vars'][key] for key in keys]
new_value = sum(values) / len(values)
for key in keys:
del zmat['vars'][key]
else:
# Adding a new value to an existing key.
found_key = False
keys_as_indices = [get_atom_indices_from_zmat_parameter(key)[0] for key in keys]
for variable in zmat['vars'].keys():
if variable[0] == key_type:
var_indices = get_atom_indices_from_zmat_parameter(variable)
if any([key_indices in var_indices for key_indices in keys_as_indices]):
found_key = True
for key_indices in keys_as_indices:
if key_indices not in var_indices:
var_indices += (key_indices,)
new_consolidated_key = key_type
for i in range(len(var_indices[0])):
new_consolidated_key += '_' + \
'|'.join([str(var_index[i])
for var_index in var_indices])
zmat['vars'][new_consolidated_key] = zmat['vars'][variable]
break
if found_key:
del zmat['vars'][variable]
for key in keys:
if key in list(zmat['vars'].keys()):
del zmat['vars'][key]
else:
raise ZMatError('Could not consolidate zmat')
zmat['vars'][new_key] = new_value
zmat['coords'] = tuple(zmat['coords'])
return zmat
[docs]
def get_atom_indices_from_zmat_parameter(param: str) -> tuple:
"""
Get the atom indices from a zmat parameter.
Examples:
'R_0_2' --> ((0, 2),)
'A_0_1_2' --> ((0, 1, 2),) corresponding to angle 0-1-2
'D_0_1_2_4' --> ((0, 1, 2, 4),)
'R_0|0_3|4' --> ((0, 3), (0, 4))
'A_0|0|0_1|1|1_2|3|4' --> ((0, 1, 2), (0, 1, 3), (0, 1, 4)) corresponding to angles 0-1-2, 0-1-3, and 0-1-4
'D_0|0|0_1|1|1_2|3|4_5|6|9' --> ((0, 1, 2, 5), (0, 1, 3, 6), (0, 1, 4, 9))
'RX_0_2' --> ((0, 2),)
Args:
param (str): The zmat parameter.
Returns:
tuple: Entries are tuples of indices, each describing R, A, or D parameters.
The tuple entries for R, A, and D types are of lengths 2, 3, and 4, respectively.
The number of tuple entries depends on the number of consolidated parameters.
"""
result, index_groups = list(), list()
splits = param.split('_')[1:] # exclude the type char ('R', 'A', or 'D')
for split in splits:
index_groups.append(split.split('|'))
for i in range(len(index_groups[0])):
result.append(tuple(int(index_group[i]) for index_group in index_groups))
return tuple(result)
[docs]
def get_parameter_from_atom_indices(zmat: dict,
indices: Union[list, tuple],
xyz_indexed: bool = True,
) -> Union[str, tuple, list]:
"""
Get the zmat parameter from the atom indices.
If indices are of length two, three, or four, an R, A, or D parameter is returned, respectively.
If a requested parameter represents an angle split by a dummy atom,
combine the two dummy angles to get the original angle.
In this case, a list of the two corresponding parameters will be returned.
Examples:
[0, 2] --> 'R_0_2', or 'R_0|1_2_5', etc.
[0, 2, 4] --> 'A_0_2_4', or a respective consolidated key
[0, 2, 4, 9] --> 'D_0_2_4_9', or a respective consolidated key
Args:
zmat (dict): The zmat.
indices (Union[list, tuple]): Entries are 0-indices of atoms, list is of length 2, 3, or 4.
xyz_indexed (bool, optional): Whether the atom indices relate to the xyz (and the zmat map will be used)
or they already relate to the zmat. Default is ``True`` (relate to xyz).
Raises:
TypeError: If ``indices`` are of wrong type.
ZMatError: If ``indices`` has a wrong length, or not all indices are in the zmat map.
Returns: Union[str, tuple, list]
The corresponding zmat parameter.
"""
if not isinstance(indices, (list, tuple)):
raise TypeError(f'indices must be a list, got {indices} which is a {type(indices)}')
if len(indices) not in [2, 3, 4]:
raise ZMatError(f'indices must be of length 2, 3, or 4, got {indices} (length {len(indices)}.')
if xyz_indexed:
if any([index not in list(zmat['map'].values()) for index in indices]):
raise ZMatError(f'Not all indices ({indices}) are in the zmat map values ({list(zmat["map"].values())}).')
indices = [key_by_val(zmat['map'], index) for index in indices]
if any([index not in list(zmat['map'].keys()) for index in indices]):
raise ZMatError(f'Not all indices ({indices}) are in the zmat map keys ({list(zmat["map"].keys())}).')
key = '_'.join([KEY_FROM_LEN[len(indices)]] + [str(index) for index in indices])
if key in list(zmat['vars'].keys()):
# It's a non-consolidated key.
return key
# It's a consolidated key.
key = KEY_FROM_LEN[len(indices)]
for var in zmat['vars'].keys():
if var[0] == key and tuple(indices) in list(get_atom_indices_from_zmat_parameter(var)):
return var
# If no value found, check whether this is an angle split by a dummy atom.
var1, var2 = None, None
if len(indices) == 3:
# 180 degree angles aren't given explicitly in the zmat,
# they are separated in to two angles using a dummy atom, check if this is the case here.
dummy_indices = [str(key) for key, val in zmat['map'].items() if isinstance(val, str) and 'X' in val]
param1 = 'AX_{0}_{1}_{2}'
all_parameters = list(zmat['vars'].keys())
for dummy_str_index in dummy_indices:
var1 = param1.format(indices[0], indices[1], dummy_str_index) \
if param1.format(indices[0], indices[1], dummy_str_index) in all_parameters and var1 is None else var1
var1 = param1.format(dummy_str_index, indices[1], indices[0]) \
if param1.format(dummy_str_index, indices[1], indices[0]) in all_parameters and var1 is None else var1
if var1 is not None:
param2a = f'AX_{dummy_str_index}_{indices[1]}_{indices[2]}'
param2b = f'AX_{indices[2]}_{indices[1]}_{dummy_str_index}'
var2 = param2a if param2a in all_parameters and var2 is None else var2
var2 = param2b if param2b in all_parameters and var2 is None else var2
if var2 is not None:
break
var1 = None
if var1 is not None and var2 is not None:
return [var1, var2]
raise ZMatError(f'Could not find a key corresponding to {key} {indices}.')
def _compare_zmats(zmat1: dict,
zmat2: dict,
r_tol: Optional[float] = None,
a_tol: Optional[float] = None,
d_tol: Optional[float] = None,
symmetric_torsions: Optional[dict] = None,
verbose: bool = False,
) -> bool:
"""
Compare two zmats. The zmats must have identical variables (i.e., derived from the same connectivity or ordered xyz,
using the same constraints).
This function does not make use of the zmat map, but does check that it is identical.
Args:
zmat1 (dict): zmat1.
zmat2 (dict): zmat2.
r_tol (float, optional): A tolerance for comparing distances.
a_tol (float, optional): A tolerance for comparing angles.
d_tol (float, optional): A tolerance for comparing dihedral angles.
symmetric_torsions (dict, optional): Keys are tuples of 0-indexed scan indices, values are internal rotation
symmetry numbers (sigma). Conformers which only differ by an integer number
times 360 degrees / sigma are considered identical.
verbose (bool, optional): Whether to print a reason for determining the zmats are different if they are,
``True`` to print.
Raises:
ZMatError: If the zmats are of wrong type or don't have all attributes.
Returns:
bool: Whether the two zmats represent the same conformation to the desired tolerance. ``True`` if they do.
"""
if not isinstance(zmat1, dict) or not isinstance(zmat2, dict):
raise ZMatError(f'zmats must be dictionaries, got {type(zmat1)} and {type(zmat2)}')
if not len(list(zmat1.keys())):
raise ZMatError(f'zmat1 is empty! Got {zmat1}')
if not len(list(zmat2.keys())):
raise ZMatError(f'zmat2 is empty! Got {zmat2}')
if 'symbols' not in zmat1 or 'coords' not in zmat1 or 'vars' not in zmat1 \
or 'symbols' not in zmat2 or 'coords' not in zmat2 or 'vars' not in zmat2:
raise ZMatError(f'zmats must contain the "symbols", "coords", and "vars" keys, got: '
f'{list(zmat1.keys())} and {list(zmat2.keys())}.')
tol = {'R': r_tol or DEFAULT_COMPARISON_R_TOL,
'A': a_tol or DEFAULT_COMPARISON_A_TOL,
'D': d_tol or DEFAULT_COMPARISON_D_TOL}
if zmat1['map'] != zmat2['map']:
if verbose:
logger.info(f'zmats have different maps:\n{zmat1["map"]}\n{zmat2["map"]}')
return False
if len(zmat1['symbols']) != len(zmat2['symbols']):
if verbose:
logger.info(f'zmats have different symbols:\n{zmat1["symbols"]}\n{zmat2["symbols"]}')
return False
for coord in zmat1['coords']:
if coord not in zmat2['coords']:
if verbose:
logger.info(f'zmats differ since the coordinates {coord} are missing from zmat2.')
return False
if coord is None:
continue
for var in coord:
if var is not None:
val1, val2 = zmat1['vars'][var], zmat2['vars'][var]
key_type = var[0]
if abs(val1 - val2) > tol[key_type]:
# The dihedrals are not the same, but check whether they disagree by an integer times 360 / sigma.
sigma = None
if symmetric_torsions is not None and 'D' in var:
vars_ = get_atom_indices_from_zmat_parameter(var)
# vars is generated from var as 'D_0|0_1|1_2|3|_5|6' --> ((0, 1, 2, 5), (0, 1, 3, 6)).
for var_ in vars_:
for symmetric_torsion in symmetric_torsions.keys():
# Check the pivots only (not the entire four torsion indices).
if all([var_[i + 1] in symmetric_torsion[1:3] for i in range(2)]):
sigma = symmetric_torsions[symmetric_torsion]
if sigma is not None:
diff = abs(val1 - val2)
rotation_symmetry = 360 / sigma # The rotation symmetry in degrees.
if not any([diff - rotation_symmetry * i <= tol[key_type] for i in range(sigma)]):
sigma = None # Mark this test as unsuccessful to return False below.
if sigma is None:
if verbose:
logger.info(f'zmats differ since the respective values for var {var} in zmat1 and zmat2 '
f'({val1:.4f}, {val2:.4f}) differ by {abs(val1 - val2):.4f}, which is greater '
f'than the required tolerance of {tol[key_type]}')
return False
return True
[docs]
def get_all_neighbors(mol: Molecule,
atom_index: int,
) -> List[int]:
"""
Get atom indices of all neighbors of an atom in a molecule.
Args:
mol (Molecule): The RMG molecule with connectivity information.
atom_index (int): The index of the atom whose neighbors are requested.
Returns:
List[int]: Atom indices of all neighbors of the requested atom.
"""
neighbors = list()
for atom in mol.atoms[atom_index].edges.keys():
neighbors.append(mol.atoms.index(atom))
return neighbors
[docs]
def is_atom_in_new_fragment(atom_index: int,
zmat: Dict[str, Union[dict, tuple]],
fragments: Optional[List[List[int]]] = None,
skip_atoms: Optional[List[int]] = None,
) -> bool:
"""
Whether an atom is present in a new fragment that hasn't been added to the zmat yet,
and therefore atom assignment should not be done based on connectivity.
Args:
atom_index (int): The 0-index of the atom in the molecule or cartesian coordinates to be added.
(``n`` and ``atom_index`` refer to the same atom, but it might have different indices
in the zmat/molecule/xyz/fragments)
zmat (dict): The zmat.
skip_atoms (list): Atoms in the zmat map to ignore when checking fragments.
fragments (List[List[int]], optional):
Fragments represented by the species, i.e., as in a VdW well or a TS.
Entries are atom index lists of all atoms in a fragment, each list represents a different fragment.
indices are 0-indexed.
Returns:
bool: Whether to consider connectivity for assigning atoms in z-mat variables.
"""
skip_atoms = skip_atoms or list()
if fragments is not None and len(fragments) > 1:
for fragment in fragments:
if atom_index in fragment:
if all([z_index in skip_atoms or frag_index not in fragment
for z_index, frag_index in zmat['map'].items()]):
# All atoms considered thus far are not in the current fragment, connectivity is meaningless.
return True
break
return False
[docs]
def up_param(param: str,
increment: Optional[int] = None,
increment_list: Optional[List[int]] = None,
) -> str:
"""
Increase the indices represented by a zmat parameter.
Args:
param (str): The zmat parameter, e.g., 'D_4_2_1_0'.
increment (int, optional): Uniform value to add to each index.
increment_list (list, optional): Individual increments per index.
Raises:
ZMatError: If no increment was provided or resulting index is negative.
Returns: str
The new parameter with increased indices.
"""
if increment is None and increment_list is None:
raise ZMatError('Either increment or increment_list must be specified.')
indices = get_atom_indices_from_zmat_parameter(param)[0]
if increment_list:
if len(increment_list) != len(indices):
raise ZMatError(f'Increment list length ({len(increment_list)}) does not match index count ({len(indices)})')
new_indices = [i + inc for i, inc in zip(indices, increment_list)]
else:
new_indices = [i + increment for i in indices]
if any(i < 0 for i in new_indices):
raise ZMatError(f'Negative index in param "{param}" after increment.')
tag = param.split('_')[0]
return '_'.join([tag] + [str(i) for i in new_indices])
[docs]
def remove_zmat_atom_0(zmat: dict) -> dict:
"""
Remove atom 0 from a Z-matrix complete structure, dropping all references to it
and renumbering the remaining atoms, parameters, and map accordingly.
"""
if len(zmat['symbols']) <= 1:
return {'symbols': (), 'coords': (), 'vars': {}, 'map': {}}
if len(zmat['symbols']) == 2:
return {'symbols': (zmat['symbols'][1],), 'coords': ((None, None, None),), 'vars': {}, 'map': {0: 0}}
orig_map0 = zmat['map'][0]
dropped_idx = int(orig_map0[1:]) if isinstance(orig_map0, str) and orig_map0.startswith('X') else orig_map0
purged = purge_references_to_atom_0(zmat)
dropped = drop_symbol_and_coords_row_0(purged)
renumbered = renumber_params(dropped, delta=-1)
rebuilt = {**renumbered, 'map': rebuild_map(renumbered['map'], dropped_idx)}
return rebuilt
[docs]
def purge_references_to_atom_0(zmat: dict) -> dict:
"""
Replace any Z-matrix parameter referencing atom 0 with valid alternatives.
Ensures only atoms with index < current are used as references (Z-matrix rule).
Leaves map untouched. Atom 0 is still present and must be removed later.
"""
z0 = deepcopy(zmat)
xyz_coords, _ = zmat_to_coords(zmat=z0, keep_dummy=True)
new_vars, new_coords = dict(), list()
def safe_calc_param(atoms):
try:
return calculate_param(coords=xyz_coords, atoms=atoms)
except (ValueError, IndexError, VectorsError):
return None
all_param_names_used = set()
for i, row in enumerate(z0['coords']):
new_row = list()
for p in row:
if not isinstance(p, str):
new_row.append(p)
continue
all_param_names_used.add(p)
if '_0' not in p:
new_row.append(p)
continue
groups = get_atom_indices_from_zmat_parameter(p)
flat = [idx for group in groups for idx in group]
if 0 not in flat:
new_row.append(p)
continue
# Replace 0 with a valid reference
used = set(flat) - {0}
candidate = 1
while candidate in used or candidate >= i:
candidate += 1
if candidate >= i:
p = None
break
else:
updated = [candidate if x == 0 else x for x in flat]
updated[0] = i # ensure proper first index
tag = p.split("_")[0]
new_p = "_".join([tag] + [str(idx) for idx in updated])
try:
xyz_indices = [zmat['map'][j] for j in updated]
except KeyError:
param_value = None
else:
param_value = safe_calc_param(xyz_indices)
if param_value is not None:
new_vars[new_p] = param_value
p = new_p
else:
p = None
new_row.append(p)
new_coords.append(tuple(new_row))
# Add original vars that are still used
for k, v in z0['vars'].items():
if k in all_param_names_used and k not in new_vars:
new_vars[k] = v
return {'symbols': z0['symbols'],
'coords': tuple(new_coords),
'vars': new_vars,
'map': z0['map']}
[docs]
def drop_symbol_and_coords_row_0(zmat: dict) -> dict:
"""
Remove the 0th atom from the Z-matrix:
- Removes the first symbol and coordinate row
- Clears DOFs for the new atoms 0–2 (Z-matrix rules)
- Removes unused variables from `vars`
- Leaves map untouched (to be rebuilt later)
- Does NOT decrement atom indices; that’s handled later
"""
z0 = deepcopy(zmat)
# Drop the first atom (index 0)
new_symbols = tuple(z0['symbols'][1:])
new_coords = list(z0['coords'][1:])
# Reset DOFs for atoms 0–2
if len(new_coords) >= 1:
new_coords[0] = (None, None, None)
if len(new_coords) >= 2:
new_coords[1] = (new_coords[1][0], None, None)
if len(new_coords) >= 3:
new_coords[2] = (new_coords[2][0], new_coords[2][1], None)
# Drop any vars no longer referenced
used_keys = {p for row in new_coords for p in row if isinstance(p, str)}
new_vars = {k: v for k, v in z0['vars'].items() if k in used_keys}
return {'symbols': new_symbols,
'coords': tuple(new_coords),
'vars': new_vars,
'map': z0['map'].copy()} # leave untouched for rebuild_map()
[docs]
def renumber_params(zmat: dict, delta: int = -1) -> dict:
"""
Renumber all atom indices in param names in both `coords` and `vars`.
Returns a self-consistent Z-matrix.
Args:
zmat (dict): The Z-matrix dict with 'coords' and 'vars'
delta (int): The value to shift all indices by (usually -1)
Returns:
dict: The updated Z-matrix with renamed param strings and values.
"""
pattern = re.compile(r'^([RAD]X?)_(\d+)(?:_(\d+))?(?:_(\d+))?(?:_(\d+))?')
# Param key mapping: old_key → new_key
key_map = {}
# 1. Rename all parameter strings in coords
new_coords = []
for row in zmat['coords']:
new_row = []
for p in row:
if not isinstance(p, str):
new_row.append(p)
continue
match = pattern.fullmatch(p)
if not match:
new_row.append(p)
continue
tag, *idxs = match.groups()
shifted = [str(max(int(i) + delta, 0)) for i in idxs if i is not None]
new_key = '_'.join([tag] + shifted)
key_map[p] = new_key
new_row.append(new_key)
new_coords.append(tuple(new_row))
# 2. Rename all keys in vars that were used in coords
new_vars = {}
for old_key, val in zmat['vars'].items():
new_key = key_map.get(old_key)
if new_key:
new_vars[new_key] = val
return {**zmat,
'coords': tuple(new_coords),
'vars': new_vars}
[docs]
def rebuild_map(old_map: Dict[int, Union[int, str]], dropped_idx: int) -> Dict[int, Union[int, str]]:
"""
Rebuild the Z-matrix to XYZ index map after removing atom 0.
- Z-matrix indices (keys) are shifted down by 1 if > 0
- XYZ indices (values) are shifted down by 1 if > dropped_idx
- Dummy atoms ('Xn') are handled appropriately
Args:
old_map (dict): Original map from Z-matrix to XYZ indices (int or 'Xn')
dropped_idx (int): Cartesian index of the removed atom
Returns:
dict: New map with updated indices
"""
new_map = {}
for old_z_i, old_xyz_i in old_map.items():
if old_z_i == 0:
continue # drop atom 0
new_z_i = old_z_i - 1
if isinstance(old_xyz_i, str) and old_xyz_i.startswith('X'):
x_idx = int(old_xyz_i[1:])
new_x_idx = x_idx - 1 if x_idx > dropped_idx else x_idx
new_xyz_i = f'X{new_x_idx}'
else:
new_xyz_i = old_xyz_i - 1 if old_xyz_i > dropped_idx else old_xyz_i
new_map[new_z_i] = new_xyz_i
return new_map
[docs]
def map_index_to_int(index: Union[int, str]) -> int:
"""
Convert a zmat map value, e.g., 1 or 'X15', into an int, e.g., 1 or 15.
Args:
index (Union[int, str]): The map index.
Returns: int
"""
if isinstance(index, int):
return index
if isinstance(index, str) and all(char.isdigit() for char in index[1:]):
return int(index[1:])
raise TypeError(f'Expected either an int or a string on the format "X15", got {index}')