Source code for arc.job.trsh

"""
The ARC troubleshooting ("trsh") module
"""

import math
import os
from typing import List, Optional, Tuple, Union

import numpy as np
import pandas as pd
import re

from arc.common import (check_torsion_change,
                        convert_to_hours,
                        estimate_orca_mem_cpu_requirement,
                        get_logger,
                        get_number_with_ordinal_indicator,
                        is_same_pivot,
                        is_same_sequence_sublist,
                        is_str_float
                        )
from arc.exceptions import InputError, SpeciesError, TrshError
from arc.imports import settings
from arc.level import Level
from arc.job.local import execute_command
from arc.job.ssh import SSHClient
from arc.species import ARCSpecies
from arc.species.conformers import determine_smallest_atom_index_in_scan
from arc.species.converter import (displace_xyz, ics_to_scan_constraints)
from arc.species.species import determine_rotor_symmetry
from arc.species.vectors import calculate_dihedral_angle, calculate_distance
from arc.parser.parser import (parse_1d_scan_coords,
                               parse_geometry,
                               parse_normal_mode_displacement,
                               parse_scan_args,
                               parse_scan_conformers,
                               determine_ess
                               )


logger = get_logger()


delete_command, inconsistency_ab, inconsistency_az, maximum_barrier, preserve_params_in_scan, rotor_scan_resolution, \
    servers, submit_filenames = settings['delete_command'], settings['inconsistency_ab'], settings['inconsistency_az'], \
                                settings['maximum_barrier'], settings['preserve_params_in_scan'], \
                                settings['rotor_scan_resolution'], settings['servers'], settings['submit_filenames']



[docs]
def determine_ess_status(output_path: str,
                         species_label: str,
                         job_type: str,
                         job_log: Optional[str] = None,
                         software: Optional[str] = None,
                         ) -> Tuple[str, List[str], str, str]:
    """
    Determine the reason that caused an ESS job to crash, assign error keywords for troubleshooting.

    Args:
        output_path (str): The path to the ESS output file.
        species_label (str): The species label.
        job_type (str): The job type (e.g., 'opt, 'freq', 'ts', 'sp').
        job_log (str, optional): The path to the server job log file (not the ESS output file) or its content.
        software (str, optional): The ESS software.

    Returns: Tuple[str, List[str], str, str]
        - The status. Either 'done' or 'errored'.
        - The standardized error keywords.
        - A description of the error.
        - The parsed line from the ESS output file indicating the error.
    """
    keywords, error, line = determine_job_log_memory_issues(job_log=job_log)
    if error:
        return 'errored', keywords, error, line

    if software is None:
        software = determine_ess(log_file_path=output_path)

    keywords, error, = list(), ''
    with open(output_path, 'r') as f:
        lines = f.readlines()
        if len(lines) < 5:
            return 'errored', ['NoOutput'], 'Log file could not be read', ''
        forward_lines = tuple(lines)
        reverse_lines = tuple(lines[::-1])
        len_reversed_lines, len_forward_lines = len(reverse_lines), len(forward_lines)

        if software == 'gaussian':
            for line in forward_lines[-1:-20:-1]:
                if 'Normal termination' in line:
                    return 'done', list(), '', ''
            for i, line in enumerate(reverse_lines):
                if 'termination' in line:
                    if 'l9999.exe' in line or 'link 9999' in line:
                        cycle_issue = False
                        neg_eigenvalues = False
                        for j in range(i, len_reversed_lines):
                            if 'Number of steps exceeded' in reverse_lines[j]:
                                keywords = ['MaxOptCycles', 'GL9999']
                                error = 'Maximum optimization cycles reached.'
                                cycle_issue = True
                                line = 'Number of steps exceeded'
                                break
                            elif "Wrong number of Negative eigenvalues" in reverse_lines[j]:
                                keywords = ['NegEigenvalues', 'GL9999']
                                error = 'Wrong number of Negative eigenvalues'
                                neg_eigenvalues = True
                                line = 'Wrong number of Negative eigenvalues'
                                break
                        if not cycle_issue and not neg_eigenvalues:
                            keywords = ['Unconverged', 'GL9999']  # GL stand for Gaussian Link
                            error = 'Unconverged'
                    elif 'l101.exe' in line:
                        keywords = ['InputError', 'GL101']
                        error = 'The blank line after the coordinate section is missing, ' \
                                'or charge/multiplicity was not specified correctly.'
                    elif 'l103.exe' in line:
                        keywords = ['InternalCoordinateError', 'GL103','NoSymm']
                        error = 'Internal coordinate error'
                    elif 'l108.exe' in line:
                        keywords = ['InputError', 'GL108']
                        error = 'There are two blank lines between z-matrix and ' \
                                'the variables, expected only one.'
                    elif 'l202.exe' in line:
                        keywords = ['OptOrientation', 'GL202']
                        error = 'During the optimization process, either the standard ' \
                                'orientation or the point group of the molecule has changed.'
                    elif 'l301.exe' in line:
                        keywords = ['GL301']
                    elif 'l401.exe' in line:
                        keywords = ['GL401']
                    elif 'l502.exe' in line:
                        # Check if Inaccurate quadrature in CalDSu
                        inacc_quad = False
                        for j in range(i + 300, i, -1):
                            if j < len_reversed_lines and 'Inaccurate quadrature in CalDSu' in reverse_lines[j]:
                                inacc_quad = True
                                keywords = ['InaccurateQuadrature', 'GL502']
                                error = 'Inaccurate quadrature in CalDSu'
                                line = 'Inaccurate quadrature in CalDSu'
                                break
                        if not inacc_quad:
                            keywords = ['SCF', 'GL502', 'NoSymm']
                            error = 'Unconverged SCF'
                    elif 'l508.exe' in line:
                        keywords = ['no_xqc', 'GL508']
                        error = 'Unconverged'
                    elif 'l716.exe' in line:
                        keywords = ['ZMat', 'GL716']
                        error = 'Angle in z-matrix outside the allowed range 0 < x < 180.'
                    elif 'l906.exe' in line:
                        keywords = ['MP2', 'GL906']
                        error = 'The MP2 calculation has failed. It may be related to pseudopotential. ' \
                                'Basis sets (CEP-121G*) that are used with polarization functions, ' \
                                'where no polarization functions actually exist.'
                    elif 'l913.exe' in line:
                        keywords = ['MaxOptCycles', 'GL913']
                        error = 'Maximum optimization cycles reached.'
                    elif 'l123.exe' in line:
                        delta_x = False
                        gs2_opt = False
                        for j in range(i + 1, len_reversed_lines):
                            if 'Delta-x Convergence NOT Met' in reverse_lines[j]:
                                delta_x = True
                                keywords = ['DeltaX', 'GL123']
                                error = 'Delta-x Convergence NOT Met'
                                line = 'Delta-x Convergence NOT Met'
                                break
                            elif 'GS2 Optimization Failure' in reverse_lines[j]:
                                gs2_opt = True
                                keywords = ['GS2Opt', 'GL123']
                                error = 'GS2 Optimization Failure'
                                line = 'GS2 Optimization Failure'
                                break
                    if any([keyword in ['GL301', 'GL401'] for keyword in keywords]):
                        additional_info = forward_lines[len_forward_lines - i - 2]
                        if 'No data on chk file' in additional_info \
                                or 'Basis set data is not on the checkpoint file' in additional_info \
                                or 'Error in GetGes' in additional_info:
                            keywords = ['CheckFile']
                            error = additional_info.strip()
                        elif 'GL301' in keywords:
                            if 'Atomic number out of range for' in forward_lines[len_forward_lines - i - 2]:
                                keywords.append('BasisSet')
                                error = f'The basis set {forward_lines[len_forward_lines - i - 2].split()[6]} ' \
                                        f'is not appropriate for the this chemistry.'
                            else:
                                keywords.append('InputError')
                                error = 'Either charge, multiplicity, or basis set was not ' \
                                        'specified correctly. Alternatively, a specified atom does not match any ' \
                                        'standard atomic symbol.'
                        elif 'GL401' in keywords:
                            keywords.append('BasisSet')
                            error = 'The projection from the old to the new basis set has failed.'
                elif 'Erroneous write' in line or 'Write error in NtrExt1' in line:
                    keywords = ['DiskSpace']
                    error = 'Ran out of disk space.'
                    line = ''
                elif 'NtrErr' in line:
                    keywords = ['CheckFile']
                    error = 'An operation on the check file was specified, but a .chk was not found or is incomplete.'
                    line = ''
                elif 'malloc failed' in line or 'galloc' in line:
                    keywords = ['Memory']
                    error = 'Memory allocation failed (did you ask for too much?)'
                    line = ''
                elif 'PGFIO/stdio: No such file or directory' in line:
                    keywords = ['Scratch']
                    error = 'Wrongly specified the scratch directory. Correct the "GAUSS_SCRDIR" ' \
                            'variable in the submit script, it should point to an existing directory. ' \
                            'Make sure to add "mkdir -p $GAUSS_SCRDIR" to your submit script.'
                    line = ''
                if 'a syntax error was detected' in line.lower() or 'an ambiguous keyword was detected' in line.lower():
                    keywords = ['Syntax']
                    error = 'There was a syntax error in the Gaussian input file. Check your Gaussian input file ' \
                            'template under arc/job/inputs.py. Alternatively, perhaps the level of theory is not ' \
                            'supported by Gaussian in the specific format it was given.'
                    line = ''
                if keywords:
                    break
            error = error if error else 'Gaussian job terminated for an unknown reason. ' \
                                        'It is possible there was a server node failure.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'qchem':
            done = False
            for line in reverse_lines:
                if 'Thank you very much for using Q-Chem' in line:
                    done = True
                    # If this is an opt job, we must also check that the max num of cycles hasn't been reached,
                    # so don't break yet.
                    if 'opt' not in job_type and 'conf_opt' not in job_type and 'ts' not in job_type:
                        break
                elif 'SCF failed' in line:
                    keywords = ['SCF']
                    error = 'SCF failed'
                    break
                elif 'error' in line and 'DIIS' not in line:
                    # These are **normal** lines that we should not capture:
                    # "SCF converges when DIIS error is below 1.0E-08", or
                    # "Cycle       Energy         DIIS Error"
                    keywords = ['SCF', 'DIIS']
                    error = 'SCF failed'
                    break
                elif 'Invalid charge/multiplicity combination' in line:
                    raise SpeciesError(f'The multiplicity and charge combination for species '
                                       f'{species_label} are wrong.')
                if 'opt' in job_type or 'conf_opt' in job_type or 'ts' in job_type:
                    if 'MAXIMUM OPTIMIZATION CYCLES REACHED' in line:
                        keywords = ['MaxOptCycles']
                        error = 'Maximum optimization cycles reached.'
                        break
                    elif 'OPTIMIZATION CONVERGED' in line and done:  # `done` should already be assigned
                        done = True
                        break
            if done:
                return 'done', keywords, '', ''
            error = error if error else 'QChem job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'orca':
            done = False
            for i, line in enumerate(reverse_lines):
                if 'ORCA TERMINATED NORMALLY' in line:
                    # not done yet, things can still go wrong (e.g., SCF energy might blow up)
                    for j, info in enumerate(forward_lines):
                        if 'Starting incremental Fock matrix formation' in info:
                            while not is_str_float(forward_lines[j + 1].split()[1]):
                                j += 1
                            scf_energy_initial_iteration = float(forward_lines[j + 1].split()[1])
                        if 'TOTAL SCF ENERGY' in info:
                            # this value is very close to the scf energy at last iteration and is easier to parse
                            scf_energy_last_iteration = float(forward_lines[j + 3].split()[3])
                            break
                    # Check if final SCF energy makes sense
                    scf_energy_ratio = scf_energy_last_iteration / scf_energy_initial_iteration
                    scf_energy_ratio_threshold = 2  # it is rare that this ratio > 2
                    if scf_energy_ratio > scf_energy_ratio_threshold:
                        keywords = ['SCF']
                        error = f'The SCF energy seems diverged during iterations. SCF energy after initial ' \
                                f'iteration is {scf_energy_initial_iteration}. SCF energy after final iteration ' \
                                f'is {scf_energy_last_iteration}. The ratio between final and initial SCF energy ' \
                                f'is {scf_energy_ratio}. This ratio is greater than the default threshold of ' \
                                f'{scf_energy_ratio_threshold}. Please consider using alternative methods or larger ' \
                                f'basis sets.'
                        line = ''
                    else:
                        done = True
                    break
                elif 'ORCA finished by error termination in SCF' in line:
                    keywords = ['SCF']
                    for j, info in enumerate(reverse_lines):
                        if 'Please increase MaxCore' in info:
                            try:
                                # e.g., Please increase MaxCore to more than: 289 MB
                                estimated_mem = float(info.split()[-2]) + 500
                            except ValueError:
                                error = 'Insufficient job memory.'
                                keywords.append('Memory')
                                break
                            keywords.append('Memory')
                            # e.g., Error (ORCA_SCF): Not enough memory available!
                            line = reverse_lines[j + 3].rstrip()
                            error = f'Orca suggests to increase per cpu core memory to {estimated_mem} MB.'
                            break
                    else:
                        error = 'SCF error in Orca.'
                    break
                elif 'ORCA finished by error termination in MDCI' in line:
                    keywords = ['MDCI']
                    for j, info in enumerate(reverse_lines):
                        if 'Please increase MaxCore' in info:
                            estimated_mem_list = []
                            for message in reverse_lines:
                                if 'Please increase MaxCore' in message:
                                    try:
                                        # e.g., Please increase MaxCore - by at least ( 9717.9 MB)
                                        # This message appears multiple times, and suggests different memory at each
                                        # appearance. Need to store all suggested memory values, and then pick the
                                        # largest one. This error msg appears in Orca version 4.2.x
                                        estimated_mem = math.ceil(float(message.split()[-2]))
                                    except ValueError:
                                        # e.g., Please increase MaxCore
                                        # In old Orca versions, there is no indication on the minimum memory requirement
                                        error = 'Insufficient job memory.'
                                        break
                                    estimated_mem_list.append(estimated_mem)
                            if estimated_mem_list:
                                estimated_max_mem = np.max(estimated_mem_list) + 500
                                error = f'Orca suggests to increase per cpu core memory to {estimated_max_mem} MB.'
                            keywords.append('Memory')
                            line = info
                            break
                        elif 'parallel calculation exceeds number of pairs' in info:
                            try:
                                # e.g., Error (ORCA_MDCI): Number of processes (16) in parallel calculation exceeds
                                # number of pairs (10) - error msg in Orca version 4.2.x
                                max_core = int(info.split()[-1].strip('()'))
                                error = f'Orca cannot utilize cpu cores more than electron pairs in a molecule. The ' \
                                        f'maximum number of cpu cores can be used for this job is {max_core}.'
                            except ValueError:
                                # e.g., Error (ORCA_MDCI): Number of processes in parallel calculation exceeds
                                # number of pairs - error msg in Orca version 4.1.x
                                error = 'Orca cannot utilize cpu cores more than electron pairs in a molecule. ARC ' \
                                        'will estimate the number of cpu cores needed based on the number of heavy ' \
                                        'atoms in the molecule.'
                            keywords.append('cpu')
                            line = info
                            break
                    else:
                        error = 'MDCI error in Orca. Assuming memory allocation error.'
                        keywords.append('Memory')
                    break
                elif 'Error : multiplicity' in line:
                    keywords = ['Input']
                    error = f'The multiplicity and charge combination for species {species_label} are wrong.'
                    break
                elif 'UNRECOGNIZED OR DUPLICATED KEYWORD' in line:
                    # e.g., UNRECOGNIZED OR DUPLICATED KEYWORD(S) IN SIMPLE INPUT LINE
                    keywords = ['Syntax']
                    line = reverse_lines[i - 1]  # this line in the log file suggests which keyword might be problematic
                    problematic_keyword = line.split()[0]
                    error = f'There was keyword syntax error in the Orca input file. In particular, keywords ' \
                            f'{problematic_keyword} can either be duplicated or illegal. Please check your Orca ' \
                            f'input file template under arc/job/inputs.py. Alternatively, perhaps the level of ' \
                            f'theory or the job option is not supported by Orca in the format it was given.'
                    break
                elif 'There are no CABS' in line:
                    # e.g., ** There are no CABS   basis functions on atom number   2 (Br) **
                    keywords = ['Basis']
                    problematic_atom = line.split()[-2].strip('()')
                    error = f'There was a basis set error in the Orca input file. In particular, basis for atom type ' \
                            f'{problematic_atom} is missing. Please check if specified basis set supports this atom.'
                    break
                elif 'This wavefunction IS NOT FULLY CONVERGED!' in line:
                    keywords = ['Convergence']
                    error = 'Specified wavefunction method is not converged. Please restart calculation with larger ' \
                            'max iterations or with different convergence flags.'
                    break
                elif 'ORCA finished by error termination in GTOInt' in line:
                    error = 'GTOInt error in Orca. Assuming memory allocation error.'
                    keywords.append('GTOInt')
                    keywords.append('Memory')
                    break
            if done:
                return 'done', keywords, '', ''
            error = error if error else 'Orca job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'molpro':
            for line in reverse_lines:
                if 'molpro calculation terminated' in line.lower() \
                        or 'variable memory released' in line.lower():
                    return 'done', list(), '', ''
                elif 'No convergence' in line and '?No convergence in rhfpr' not in line:
                    keywords = ['Unconverged']
                    error = 'Unconverged'
                    break
                elif 'error exit can be avoided using the IGNORE_ERROR option on the ORBITAL directive' in line:
                    keywords = ['IGNORE_ERROR in the ORBITAL directive']
                    error = 'Unconverged'
                    break
                elif 'A further' in line and 'Mwords of memory are needed' in line and 'Increase memory to' in line:
                    # e.g.: `A further 246.03 Mwords of memory are needed for the triples to run.
                    # Increase memory to 996.31 Mwords.` (w/o the line break)
                    keywords = ['Memory']
                    for line0 in reverse_lines:
                        if ' For full I/O' in line0 and 'increase memory by' in line0 and 'Mwords to' in line0:
                            memory_increase = re.findall(r"[\d.]+", line0)[0]
                            error = f"Additional memory required: {memory_increase} MW"
                            break
                    error = f'Additional memory required: {line.split()[2]} MW' if 'error' not in locals() else error
                    break
                elif 'insufficient memory available - require' in line:
                    # e.g.: `insufficient memory available - require              228765625  have
                    #        62928590
                    #        the request was for real words`
                    keywords = ['Memory']
                    error = f'Additional memory required: {float(line.split()[-2]) / 1e6} MW'
                    break
                elif 'Insufficient memory to allocate' in line or 'The problem occurs in memory' in line:
                    # e.g.: `Insufficient memory to allocate a new array of length 321843600 8-byte words
                    #        The problem occurs in memory`
                    keywords = ['Memory']
                    error = 'Additional memory required'
                    break
                elif 'Basis library exhausted' in line:
                    # e.g.:
                    # ` SETTING BASIS          =    6-311G**
                    #
                    #
                    #  Using spherical harmonics
                    #
                    #  LIBRARY EXHAUSTED
                    #   Searching for I  S 6-311G
                    #   Library contains the following bases:
                    #  ? Error
                    #  ? Basis library exhausted
                    #  ? The problem occurs in Binput`
                    keywords = ['BasisSet']
                    basis_set = None
                    for line0 in reverse_lines:
                        if 'SETTING BASIS' in line0:
                            basis_set = line0.split()[-1]
                    error = f'Unrecognized basis set {basis_set}'
                    break
                elif 'the problem occurs' in line:
                    keywords = ['Unknown']
                    error = 'Unknown'
                    break
            error = error if error else 'Molpro job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'terachem':
            for line in lines[::-1]:
                if 'Job finished:' in line:
                    return 'done', list(), '', ''
                elif 'incorrect method' in line.lower():
                    keywords = ['IncorrectMethod']
                    error = 'incorrect method'
                    break
                elif 'error: ' in line.lower():
                    # e.g.: "ERROR: Closed shell calculations can't have spin multiplicity 0."
                    keywords = ['Unknown']  # Todo
                    error = line.split()[1]
                    break
                elif 'unable to open file: ' in line.lower() and 'basis' in line.lower():
                    # e.g.: "Unable to open file /<..path..>/TeraChem/basis/6-311++g[d,p]"
                    keywords = ['MissingBasisSet']
                    error = 'Could not find basis set {0} in TeraChem'.format(
                             line.split('/')[-1].replace('[', '(').replace(']', ')'))
            error = error if error else 'TeraChem job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

    return '', list(), '', ''




[docs]
def determine_job_log_memory_issues(job_log: Optional[str] = None) -> Tuple[List[str], str, str]:
    """
    Determine the reason that caused an ESS job to crash, assign error keywords for troubleshooting.

    Args:
        job_log (str, optional): The path to the server job log file (not the ESS output file) or its content.

    Returns: Tuple[List[str], str, str]
        - The standardized error keywords.
        - A description of the error.
        - The parsed line from the ESS output file indicating the error.
    """
    keywords, error, line = list(), '', ''
    if job_log is not None:
        if os.path.isfile(job_log):
            with open(job_log, 'r') as f:
                lines = f.readlines()
        else:
            lines = job_log.splitlines()
        mem_usage = ''
        for line in lines:
            if 'MemoryUsage of job' in line:
                # E.g.: "      3162  -  MemoryUsage of job (MB)"
                mem_usage = f', used only {0.001 * int(line.split()[0])} GB'
            if 'memory exceeded' in line.lower():
                keywords = ['Memory']
                error = 'Insufficient job memory.'
                break
            if 'using less than' in line and 'percent of requested' in line:
                # e.g., "using less than 20 percent of requested"
                keywords = ['Memory']
                error = f'Memory requested is too high{mem_usage}.'
                break
    line = line if error else ''
    return keywords, error, line




[docs]
def trsh_negative_freq(label: str,
                       log_file: str,
                       neg_freqs_trshed: list = None,
                       job_types: list = None):
    """
    Troubleshooting cases where non-TS species have negative frequencies.
    We take +/-1.1 displacements, generating several new initial geometries.

    Args:
        label (str): The species label.
        log_file (str): The frequency job log file.
        neg_freqs_trshed (list, optional): A list of negative frequencies the species was troubleshooted for.
        job_types (list, optional): The job types used for ARC, e.g., ['opt', 'rotors'].

    Todo:
        * get all torsions of the molecule (if weren't already generated),
          identify atom/s with largest displacements (top 2)
          determine torsions with unique PIVOTS where these atoms are in the "scan" and "top" but not pivotal
          generate a 360 scan using 30 deg increments and append all 12 results as conformers
          (consider rotor symmetry to append less conformers?)

    Returns: Tuple[list, list, list, list]
        - The current troubleshooted negative frequencies.
        - The new conformers to try optimizing.
        - Errors to report.
        - Warnings to report.

    Raises:
        TrshError: If a negative frequency could not be determined.
    """
    neg_freqs_trshed = neg_freqs_trshed if neg_freqs_trshed is not None else list()
    job_types = job_types if job_types is not None else ['rotors']
    output_errors, output_warnings, conformers, current_neg_freqs_trshed = list(), list(), list(), list()
    factors = [0.25, 0.50, 0.75, 1.0, 1.5, 2.5]
    factor = factors[0]
    max_times_to_trsh_neg_freq = len(factors) + 1
    freqs, normal_modes_disp = parse_normal_mode_displacement(log_file_path=log_file, raise_error=False)
    if not len(normal_modes_disp):
        logger.error(f'Could not troubleshoot negative frequency for species {label}.')
        return [], [], output_errors, []
    if len(neg_freqs_trshed) > max_times_to_trsh_neg_freq:
        logger.error(f'Species {label} was troubleshooted for negative frequencies too many times.')
        if 'rotors' not in job_types:
            logger.error('The rotor scans feature is turned off, '
                         'cannot troubleshoot geometry using dihedral modifications.')
            output_warnings.append('rotors = False; ')
        logger.error('Invalidating species.')
        output_errors.append('Error: Encountered negative frequencies too many times; Invalidating species; ')
    else:
        neg_freqs_idx = list()  # store indices w.r.t. vibfreqs
        largest_neg_freq_idx = 0  # index in vibfreqs
        for i, freq in enumerate(freqs):
            if freq < 0:
                neg_freqs_idx.append(i)
                if freqs[i] < freqs[largest_neg_freq_idx]:
                    largest_neg_freq_idx = i
            else:
                # assuming frequencies are ordered, break after the first positive freq encountered
                break
        if freqs[largest_neg_freq_idx] >= 0 or len(neg_freqs_idx) == 0:
            raise TrshError(f'Could not determine a negative frequency for species {label} '
                            f'while troubleshooting for it.')
        if len(neg_freqs_idx) == 1 and not len(neg_freqs_trshed):
            # species has one negative frequency, and has not been troubleshooted for it before
            logger.info(f'Species {label} has a negative frequency ({freqs[largest_neg_freq_idx]}). Perturbing its '
                        f'geometry using the respective vibrational normal mode displacement(s), '
                        f'using an amplitude of {factor}.')
            neg_freqs_idx = [largest_neg_freq_idx]  # indices of the negative frequencies to troubleshoot for
        elif len(neg_freqs_idx) == 1 \
                and any([np.allclose(freqs[0], vf, rtol=1e-04, atol=1e-02) for vf in neg_freqs_trshed]) \
                and len(neg_freqs_trshed) < len(factors):
            # Species has one negative frequency, and has been troubleshooted for it before.
            factor = factors[len(neg_freqs_trshed)]
            logger.info(f'Species {label} has a negative frequency ({freqs[largest_neg_freq_idx]}) for the '
                        f'{get_number_with_ordinal_indicator(len(neg_freqs_trshed))} time. '
                        f'Perturbing its geometry using the respective vibrational '
                        f'normal mode displacement(s), this time using a larger factor (x {factor})')
            neg_freqs_idx = [largest_neg_freq_idx]  # indices of the negative frequencies to troubleshoot for
        elif len(neg_freqs_idx) > 1 and not any([np.allclose(freqs[0], vf, rtol=1e-04, atol=1e-02)
                                                 for vf in neg_freqs_trshed]):
            # species has more than one negative frequency, and has not been troubleshooted for it before
            logger.info(f'Species {label} has {len(neg_freqs_idx)} negative frequencies. Perturbing its geometry using '
                        f'the vibrational normal mode displacement(s) of its largest negative frequency, '
                        f'{freqs[largest_neg_freq_idx]}')
            neg_freqs_idx = [largest_neg_freq_idx]  # indices of the negative frequencies to troubleshoot for
        elif len(neg_freqs_idx) > 1 and any([np.allclose(freqs[0], vf, rtol=1e-04, atol=1e-02)
                                             for vf in neg_freqs_trshed]):
            # species has more than one negative frequency, and has been troubleshooted for it before
            logger.info(f'Species {label} has {len(neg_freqs_idx)} negative frequencies. Perturbing its geometry '
                        f'using the vibrational normal mode displacement(s) of ALL negative frequencies')
        # Convert a numpy array to a list, important for saving the neg_freqs_trshed species attribute in the restart
        freqs_list = freqs.tolist()
        current_neg_freqs_trshed = [round(freqs_list[i], 2) for i in neg_freqs_idx]  # record trshed negative freqs
        xyz = parse_geometry(log_file)
        for neg_freq_idx in neg_freqs_idx:
            xyz_1, xyz_2 = displace_xyz(xyz=xyz, displacement=normal_modes_disp[neg_freq_idx], amplitude=factor)
            conformers.append(xyz_1)
            conformers.append(xyz_2)
    return current_neg_freqs_trshed, conformers, output_errors, output_warnings




[docs]
def trsh_scan_job(label: str,
                  scan_res: Union[int, float],
                  scan: list,
                  scan_list: list,
                  methods: dict,
                  log_file: Optional[str] = None,
                  ) -> Tuple[str, int]:
    """
    Troubleshooting rotor scans.
    Using the following methods:

    1. freeze specific internal coordinates identified by scan_quality_check()
    2. freeze all torsions other than the rotor to be scanned
    3. increasing the scan resolution

    Args:
        label (str): The species label.
        scan_res (int, float): The scan resolution in degrees.
        scan (list): The four atom indices representing the torsion to be troubleshooted.
        scan_list (list): Entries are the four-atom scan lists (1-indexed) of all torsions
                          (without duplicate pivots) in this species.
        methods (dict): The troubleshooting method/s to try. Example::

                            {'inc_res': None,
                             'freeze': 'all' or [[1, 2, 3, 4], ...]}

        log_file (str, optional): The related output file path.

    Raises:
        TrshError:

            1. ``scan`` is not included in the ``scan_list``
            2. Freeze method includes an invalid internal coordinates.
            3. Freeze method does not provide any solution.

        InputError: Invalid `methods` input.

    Returns: Tuple[str, int]
        - The scan troubleshooting keywords to be appended to the Gaussian input file.
        - The new scan resolution in degrees.
    """
    if scan not in scan_list:
        raise TrshError(f'Could not find the scan to troubleshoot in the scan list of species {label}')
    if methods is None:
        raise InputError('Expected to get a dict of methods, got None.')

    # Method 1 and method 2
    if 'freeze' in methods:
        # 1. Freeze specific internal coordinates identified by scan_quality_check()
        if methods['freeze'] != 'all':
            try:
                scan_args = parse_scan_args(log_file)
            except Exception as e:
                # Cannot read scan arguments, fall back to freeze all torsions
                logger.debug(f'Could not use freeze method for troubleshooting scan '
                             f'job for {label} with the current ESS.\nGot:{e}')
                methods['freeze'] = 'all'
            else:
                # methods = {'freeze': [[1,2,3,4], ...]}
                if not methods['freeze']:
                    raise InputError(
                        f'Could not use freeze method for troubleshooting scan job '
                        f'for {label} as no information is given.')
                problematic_ic = methods['freeze']
                # Read internal coordinates already frozen from the previous job
                already_frozen = scan_args['freeze']
                to_freeze = []

                # 1.1 Extract to-be-frozen bonds and angles, no need to prune:
                for ic in problematic_ic:
                    if len(ic) == 2 or len(ic) == 3:
                        to_freeze.append(ic)
                    elif len(ic) != 4:
                        raise TrshError(
                            f'Could not use freeze method for troubleshooting scan job for '
                            f'{label} as invalid internal coordinate {ic} is given.')
                # Remove bonds and angles in the problematic_ic
                problematic_ic = [ic for ic in problematic_ic if len(ic) == 4]

                # 1.2 First treat torsions that share the same pivots as the scanned rotor.
                #     They cannot be frozen, so they need special treatments.
                to_freeze_tmp = trsh_special_rotor(special_rotor=scan,
                                                   problematic_ic=problematic_ic,
                                                   special_type='scan')
                to_freeze += to_freeze_tmp

                # 1.3 Treat torsions which are frozen in the previous run. For some torsions, although
                #     one of the dihedrals are frozen, other dihedrals could be still problematic.
                for frozen_ic in already_frozen:
                    if len(frozen_ic) == 4:
                        to_freeze_tmp = trsh_special_rotor(special_rotor=frozen_ic,
                                                           problematic_ic=problematic_ic,
                                                           special_type='frozen')
                        to_freeze += to_freeze_tmp

                # 1.4 For other dihedrals, if encountering torsions with the same pivots
                #     just freeze the one first seen.
                pruning = []
                for torsion in problematic_ic:
                    if torsion in pruning \
                            or torsion in to_freeze \
                            or torsion[::-1] in to_freeze:
                        continue
                    to_freeze.append(torsion)
                    pruning += [ic for ic in problematic_ic if is_same_pivot(torsion, ic)]

        # 2. Freeze all torsions other than the rotor to be scanned
        if methods['freeze'] == 'all':
            scan_list.pop(scan_list.index(scan))
            to_freeze = scan_list
            already_frozen = []

        # Check the solution quality, should not include already frozen ics
        if not len(to_freeze):
            # Start with problematic ICs, but cannot come up with a good solution.
            raise TrshError(f'Freeze method does not yield a solution for rotor '
                            f'scan on {scan} of {label}.')

        # Convert to_freeze into an input block str
        to_freeze += already_frozen
        software = determine_ess(log_file_path=log_file)
        scan_trsh = ics_to_scan_constraints(ics=to_freeze, software=software)

    else:
        # If 'freeze' is not included in the method
        scan_trsh = ''

    # 3. Increasing the scan resolution
    if 'inc_res' in methods:
        scan_res = min(4, int(scan_res / 2))
        # make sure mod(360, scan res) is 0:
        if scan_res not in [4, 2, 1]:
            scan_res = min([4, 2, 1], key=lambda x: abs(x - scan_res))

    return scan_trsh, scan_res




[docs]
def trsh_special_rotor(special_rotor: list,
                       problematic_ic: list,
                       special_type: str = 'scan',
                       ) -> list:
    """
    Troubleshoot special rotor cases given all problematic torsional internal
    coordinates. Special rotors include rotor to be scanned and rotor already frozen.
    For example: If scan = [1, 2, 3, 4], `scan_quality_check()` might find [1, 2, 3, 5]
    problematic. However, we cannot simply freeze [1, 2, 3, 5] which definitely makes
    the scan failed. `trsh_special_rotor()` helps figure out the internal coordinates
    we need to actually freeze to troubleshoot the scan.

    Args:
        special_rotor (list): A list of four atoms indicating the special torsion
        problematic_ic (list): A list of torsions identified as problematic by
                               check_scan_quality. This list will be pruned.
        special_type: Indicate the type of the special rotor. Either ``scan`` for
                      the rotor to be scanned or `frozen` for the rotor were frozen
                      in the previous job

    Returns: list
        A list of internal coordinates to be frozen
    """
    pruning = []
    same_pivots_torsions = []
    to_freeze = []
    # Find the torsion sharing three common atoms with the special_rotor
    for torsion in problematic_ic:
        if torsion[1:] == special_rotor[1:] or torsion[:-1] == special_rotor[:-1]:
            if torsion[1:] == special_rotor[1:]:
                # special_rotor = [1, 2, 3, 4],  torsion = [5, 2, 3, 4], freeze [5, 1, 2, 3] (top alignment)
                # to avoid flip
                to_freeze.append([torsion[0]] + special_rotor[:-1])
                # remove all [5, 2, 1, *] or [*, 1, 2, 5] or [1, 2, 5, *] or [*, 5, 2, 1]
                sublist = [torsion[0], special_rotor[1], special_rotor[0]]
            else:
                # special_rotor = [1, 2, 3, 4],  ic = [1, 2, 3, 5], freeze[2, 3, 4, 5] to avoid flip
                to_freeze.append(special_rotor[1:] + [torsion[-1]])
                # remove all [*, 4, 3, 5] or [4, 3, 5, *] or [*, 5, 3, 4] or [5, 3, 4, *]
                sublist = [torsion[-1], special_rotor[-2], special_rotor[-1]]
            pruning += [ic for ic in problematic_ic
                        if (is_same_pivot(ic, special_rotor)
                            or is_same_sequence_sublist(sublist, ic)
                            or is_same_sequence_sublist(sublist[::-1], ic))]
            break
        elif is_same_pivot(torsion, special_rotor):
            same_pivots_torsions.append(torsion)
    else:
        # The following block will be executed only when bond break
        # is not triggered and same_pivots_torsions is not empty.
        for torsion in same_pivots_torsions:
            pruning.append(torsion)
            if special_type == 'scan':
                # A rare case which might be due to a double flip of adjacent sp2 atoms.
                # Freeze alignments of both tops.
                to_freeze.append([torsion[0]] + special_rotor[:-1])
                to_freeze.append(special_rotor[1:] + [torsion[-1]])
            elif special_type == 'frozen':
                # If no better solution, just freeze all the torsions of the same pivots.
                to_freeze.append(torsion)
    # Remove pruned internal coordinates from the problematic_ic list.
    for torsion in pruning:
        if torsion in problematic_ic:
            problematic_ic.pop(problematic_ic.index(torsion))
    return to_freeze




[docs]
def trsh_ess_job(label: str,
                 level_of_theory: Union[Level, dict, str],
                 server: str,
                 job_status: dict,
                 job_type: str,
                 software: str,
                 fine: bool,
                 memory_gb: float,
                 num_heavy_atoms: int,
                 cpu_cores: int,
                 ess_trsh_methods: list,
                 is_h: bool = False,
                 ) -> tuple:
    """
    Troubleshoot issues related to the electronic structure software, such as convergence.

    Args:
        label (str): The species label.
        level_of_theory (Union[Level, dict, str]): The original level of theory dictionary of the problematic job.
        server (str): The server used for this job.
        job_status (dict): The ESS job status dictionary with standardized error keywords
                           as generated using the `determine_ess_status` function.
        job_type (str): The original job type.
        software (str): The ESS software.
        fine (bool): Whether the job used an ultrafine grid, `True` if it did.
        memory_gb (float): The memory in GB used for the job.
        num_heavy_atoms (int): Number of heavy atoms in a molecule.
        cpu_cores (int): The total number of cpu cores requested for a job.
        ess_trsh_methods (list): The troubleshooting methods tried for this job.
        is_h (bool): Whether the species is a hydrogen atom (or its isotope). e.g., H, D, T.

    Todo:
        - Change server to one that has the same ESS if running out of disk space.

    Returns: tuple
        - output_errors (list): Errors to report.
        - ess_trsh_methods (list): The updated troubleshooting methods tried for this job.
        - remove_checkfile (bool): Whether to remove the checkfile from the job, `True` to remove.
        - level_of_theory (Level): The new level of theory dictionary to use.
        - software (str, optional): The new ESS software to use.
        - job_type (str): The new job type to use.
        - fine (bool): whether the new job should use a fine grid, `True` if it should.
        - trsh_keyword (str): The troubleshooting keyword to use.
        - memory (float): The new memory in GB to use for the job.
        - shift (str): The shift to use (only in Molpro).
        - cpus (int): The total number of cpu cores requested for a job.
        - couldnt_trsh (bool): Whether a troubleshooting solution was found. `True` if it was not found.
    """
    level_of_theory = Level(repr=level_of_theory)
    output_errors = list()
    remove_checkfile, couldnt_trsh = False, False
    trsh_keyword, shift = '', ''
    memory = memory_gb

    if server is not None and 'memory' not in servers[server]:
        servers[server]['memory'] = 64
        logger.warning(f'A "memory" key (relating to the *maximum* physical node memory) was not defined '
                       f'for server {server}. Setting it to 64 GB (as a guess). This will affect job troubleshooting '
                       f'methods which attempt to increase the job memory. This value should be specified in the '
                       f'servers dictionary in settings.py')

    if 'DiskSpace' in job_status['keywords']:
        output_errors.append(f'Error: Could not troubleshoot {job_type} for {label}! '
                             f'The job ran out of disc space on {server}; ')
        logger.error(f'Could not troubleshoot {job_type} for {label}! The job ran out of disc space on {server}')
        couldnt_trsh = True
    elif 'BasisSet' in job_status['keywords']\
            and ('Unrecognized basis set' in job_status['error']
                 or 'is not appropriate for the this chemistry' in job_status['error']):
        output_errors.append(f'Error: Could not recognize basis set {job_status["error"].split()[-1]} in {software}; ')
        couldnt_trsh = True
        logger.info(f'Troubleshooting {job_type} job in {software} for {label} that failed with '
                    '"Basis set data is not on the checkpoint file" by removing the checkfile.')

    elif software == 'gaussian':
        trsh_keyword = [] # initialize as a list
        logger_phrase = f'Troubleshooting {job_type} job in {software} for {label}'
        logger_info = []
        couldnt_trsh = True
        attempted_ess_trsh_methods = ess_trsh_methods.copy() if ess_trsh_methods else None
        # Check if Checkfile removal is in the keywords. Removal occurs when:
        # - Basis Set Mismatch
        # - Corrupt or Incomplete Data
        # - Changing Computational Parameters
        remove_checkfile, ess_trsh_methods, couldnt_trsh = trsh_keyword_checkfile(job_status, ess_trsh_methods, couldnt_trsh)
        if remove_checkfile:
             logger_info.append('that failed with "Basis set data is not on the checkpoint file" by removing the checkfile.')

        # Check if InternalCoordinateError is in the keyword or opt=(cartesian)
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_cartesian(job_status, ess_trsh_methods, job_type, trsh_keyword,couldnt_trsh)
        if 'cartesian' in ess_trsh_methods:
            logger_info.append('using opt=cartesian')
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_intaccuracy(ess_trsh_methods, trsh_keyword, couldnt_trsh)
        if 'int=(Acc2E=14)' in ess_trsh_methods:
           logger_info.append('using int=(Acc2E=14)')

        # Check if SCF is in the keyword
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_scf(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
        scf_list = [i for i in ess_trsh_methods if i.startswith('scf=')]
        if scf_list:
            formatted_string = f'using {scf_list[0]}'
            for i in scf_list[1:]:
                formatted_string += f', {i}'
            logger_info.append(formatted_string)
        
        # Check if InaccurateQuadrature in the keyword
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_inaccurate_quadrature(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)

        # Check if NoSymm
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_nosymm(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
        if 'NoSymm' in ess_trsh_methods:
            logger_info.append('using nosymm')

        # Check if unconverged is in the keyword
        ess_trsh_methods, trsh_keyword, fine, couldnt_trsh = trsh_keyword_unconverged(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh, fine)
        if fine:
            logger_info.append('using a fine grid')
            
        # Check if NegEigenvalues is in the keyword
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_neg_eigen(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)

        # Troubleshoot by increasing opt max cycles
        #P opt=(calcfc,maxstep=5,tight,maxcycle=200) guess=mix wb97xd/def2tzvp integral=(grid=ultrafine, Acc2E=14) IOp(2/9=2000) scf=(direct,tight,maxcycle=512) iop(3/33=1)
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_opt_maxcycles(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
        # print out any words that beging with 'opt='
        opt_list = [i for i in ess_trsh_methods if i.startswith('opt=')]
        if opt_list:
            formatted_string = f'using {opt_list[0]}'
            for i in opt_list[1:]:
                formatted_string += f', {i}'
            logger_info.append(formatted_string)
        
        # Check for L123 errors
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_l123(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
        irc_list = [i for i in ess_trsh_methods if i.startswith('irc=')]
        if irc_list:
            formatted_string = f'using {irc_list[0]}'
            for i in irc_list[1:]:
                formatted_string += f', {i}'
            logger_info.append(formatted_string)
            
        # Remove qc from ess_trsh_methods if 'no_qc' is in the keywords
        ess_trsh_methods, trsh_keyword, couldnt_trsh = trsh_keyword_no_qc(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh)
        if 'no_qc' in ess_trsh_methods:
            logger_info.append('removed QC')
        

        # Check if memory is in the keyword
        if 'Memory' in job_status['keywords'] and 'too high' not in job_status['error'] and server is not None:
            # Increase memory allocation
            couldnt_trsh = False
            max_mem = servers[server].get('memory', 128)  # Node memory in GB, defaults to 128 if not specified
            memory = min(memory_gb * 2, max_mem * 0.95)
            if memory > memory_gb:
                logger.info(f'Troubleshooting {job_type} job in {software} for {label} using more memory: {memory} GB '
                            f'instead of {memory_gb} GB')
                ess_trsh_methods.append('memory')

        if attempted_ess_trsh_methods:
            if attempted_ess_trsh_methods == ess_trsh_methods:
                logger.info(f'{logger_phrase} was not successful. Already attempted all possible troubleshooting methods. Will not troubleshoot again.')
                ess_trsh_methods.append("all_attempted")
                couldnt_trsh = True
            else:
                if logger_info:
                    logger.info(f'{logger_phrase} {", ".join(logger_info)}')
        elif logger_info:
            logger.info(f'{logger_phrase} {", ".join(logger_info)}')

    elif software == 'qchem':
        if 'MaxOptCycles' in job_status['keywords'] and 'max_cycles' not in ess_trsh_methods:
            # this is a common error, increase max cycles and continue running from last geometry
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using max_cycles')
            ess_trsh_methods.append('max_cycles')
            trsh_keyword = '\n   GEOM_OPT_MAX_CYCLES 250'  # default is 50
        elif 'SCF' in job_status['keywords'] and 'DIIS_GDM' not in ess_trsh_methods:
            # change the SCF algorithm and increase max SCF cycles
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using the DIIS_GDM SCF algorithm')
            ess_trsh_methods.append('DIIS_GDM')
            trsh_keyword = '\n   SCF_ALGORITHM DIIS_GDM\n   MAX_SCF_CYCLES 1000'  # default is 50
        elif 'SYM_IGNORE' not in ess_trsh_methods:  # symmetry - look in manual, no symm if fails
            # change the SCF algorithm and increase max SCF cycles
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using SYM_IGNORE as well as the '
                        f'DIIS_GDM SCF algorithm')
            ess_trsh_methods.append('SYM_IGNORE')
            trsh_keyword = '\n   SCF_ALGORITHM DIIS_GDM\n   MAX_SCF_CYCLES 250\n   SYM_IGNORE     True'
        else:
            couldnt_trsh = True

    elif 'orca' in software:
        if 'Memory' in job_status['keywords']:
            # Increase memory allocation.
            # job_status will be for example
            # `Error  (ORCA_SCF): Not enough memory available! Please increase MaxCore to more than: 289 MB`.
            if 'memory' not in ess_trsh_methods:
                ess_trsh_methods.append('memory')
            try:
                # parse Orca's memory requirement in MB
                estimated_mem_per_core = float(job_status['error'].split()[-2])
            except ValueError:
                estimated_mem_per_core = estimate_orca_mem_cpu_requirement(num_heavy_atoms=num_heavy_atoms,
                                                                           server=server,
                                                                           consider_server_limits=True)[1]/cpu_cores
            # round up to the next hundred
            estimated_mem_per_core = int(np.ceil(estimated_mem_per_core / 100.0)) * 100
            if 'max_total_job_memory' in job_status['keywords']:
                per_cpu_core_memory = np.ceil(memory_gb / cpu_cores * 1024)
                logger.info(f'The crashed Orca job {label} was ran with {cpu_cores} cpu cores and '
                            f'{per_cpu_core_memory} MB memory per cpu core. It requires at least '
                            f'{estimated_mem_per_core} MB per cpu core. Since the job had already requested the '
                            f'maximum amount of available total node memory, ARC will attempt to reduce the number '
                            f'of cpu cores to increase memory per cpu core.')
                if 'cpu' not in ess_trsh_methods:
                    ess_trsh_methods.append('cpu')
                cpu_cores = math.floor(cpu_cores * per_cpu_core_memory / estimated_mem_per_core) - 2  # be conservative
                if cpu_cores > 1:
                    logger.info(f'Troubleshooting job {label} using {cpu_cores} cpu cores.')
                elif cpu_cores == 1:  # last resort
                    logger.info(f'Troubleshooting job {label} using only {cpu_cores} cpu core. Notice that the '
                                f'required job time may be unrealistically long or exceed limits on servers.')
                else:
                    logger.info(f'Not enough computational resource to accomplish job {label}. Please consider cheaper '
                                f'methods or allocate more resources if possible.')
                    couldnt_trsh = True
            if not couldnt_trsh:
                memory = estimated_mem_per_core * cpu_cores  # total memory for all cpu cores
                memory = np.ceil(memory / 1024 + 5)  # convert MB to GB, add 5 extra GB (be conservative)
                logger.info(f'Troubleshooting {job_type} job in {software} for {label} using {memory} GB total memory '
                            f'and {cpu_cores} cpu cores.')
        elif 'cpu' in job_status['keywords']:
            # Reduce cpu allocation.
            try:
                # job_status will be for example (Orca varsion 4.2.x)
                # Error (ORCA_MDCI): Number of processes (16) in parallel calculation exceeds number of pairs (10)
                cpu_cores = int(job_status['error'].split()[-1].strip('.'))  # max_cpu_cores_allowed
            except ValueError:
                cpu_cores = estimate_orca_mem_cpu_requirement(num_heavy_atoms=num_heavy_atoms, server=server,
                                                              consider_server_limits=True)[0]
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using {cpu_cores} cpu cores.')
            if 'cpu' not in ess_trsh_methods:
                ess_trsh_methods.append('cpu')
        elif 'dlpno' in level_of_theory.method and is_h:
            logger.error('DLPNO method is not supported for H atom (or its isotope D or T) in Orca.')
            couldnt_trsh = True
        else:
            couldnt_trsh = True

    elif 'molpro' in software:
        if 'Memory' in job_status['keywords']:
            # Increase memory allocation.
            # molpro gives something like `'errored: additional memory (mW) required: 996.31'`.
            # job_status standardizes the format to be:  `'Additional memory required: {0} MW'`
            # The number is the ADDITIONAL memory required in GB
            ess_trsh_methods.append('memory')
            add_mem_str = job_status['error'].split()[-2]  # parse Molpro's requirement in MW
            if all(c.isdigit() or c == '.' for c in add_mem_str):
                add_mem = float(add_mem_str)
                add_mem = int(np.ceil(add_mem / 100.0)) * 100  # round up to the next hundred
                memory = memory_gb + add_mem / 128. + 5  # convert MW to GB, add 5 extra GB (be conservative)
            else:
                # The required memory is not specified
                memory = memory_gb * 3  # convert MW to GB, add 5 extra GB (be conservative)
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using memory: {memory:.2f} GB '
                        f'instead of {memory_gb} GB')
        elif 'shift' not in ess_trsh_methods:
            # Try adding a level shift for alpha- and beta-spin orbitals
            # Applying large negative level shifts like {rhf; shift,-1.0,-0.5}
            # will often stabilize convergence at the expense of making it somewhat slower.
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using shift')
            ess_trsh_methods.append('shift')
            shift = 'shift,-1.0,-0.5;'
        elif 'vdz' not in ess_trsh_methods:
            # degrade the basis set
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using vdz')
            ess_trsh_methods.append('vdz')
            trsh_keyword = 'vdz'
        elif 'vdz & shift' not in ess_trsh_methods:
            # try adding a level shift for alpha- and beta-spin orbitals
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using vdz')
            ess_trsh_methods.append('vdz & shift')
            shift = 'shift,-1.0,-0.5;'
            trsh_keyword = 'vdz'
        elif 'memory' not in ess_trsh_methods:
            # Increase memory allocation, also run with a shift
            ess_trsh_methods.append('memory')
            memory = servers[server]['memory']  # set memory to the value of an entire node (in GB)
            logger.info(f'Troubleshooting {job_type} job in {software} for {label} using memory: {memory:.2f} GB '
                        f'instead of {memory_gb} GB')
            shift = 'shift,-1.0,-0.5;'
        else:
            couldnt_trsh = True

    elif 'terachem' in software:
        """
        scf diis+a
        maxit 50

        solve in freq:
        Maximum gradient component at reference geometry: 2.19e-02
        Maximum component of gradient is too large
        Optimize the geometry and try again
        """
        couldnt_trsh = True

    else:
        logger.error(f'Troubleshooting methods are not implemented for {software}')
        couldnt_trsh = True

    if couldnt_trsh:
        logger.error(f'Could not troubleshoot geometry optimization for {label}! '
                     f'Tried troubleshooting with the following methods: {ess_trsh_methods}')
        output_errors.append(f'Error: Could not troubleshoot {job_type} for {label}! '
                             f'Tried troubleshooting with the following methods: {ess_trsh_methods}; ')
    return (output_errors,
            ess_trsh_methods,
            remove_checkfile,
            level_of_theory,
            software,
            job_type,
            fine,
            trsh_keyword,
            memory,
            shift,
            cpu_cores,
            couldnt_trsh,
            )




[docs]
def trsh_conformer_isomorphism(software: str,
                               ess_trsh_methods: list = None,
                               ) -> Optional[str]:
    """
    Troubleshoot conformer optimization for a species that failed isomorphic test in
    `determine_most_stable_conformer` by specifying a "good" level of theory.

    Args:
        software (str): The ESS used.
        ess_trsh_methods (list, optional): The troubleshooting methods tried for this job.

    Raises:
        TrshError: If the requested ``ess_trsh_methods`` is not supported.

    Returns: Optional[str]
        The level of theory to troubleshoot at.
    """
    ess_trsh_methods = ess_trsh_methods if ess_trsh_methods is not None else list()
    if software == 'gaussian':
        conformer_trsh_methods = ['wb97xd/def2TZVP', 'apfd/def2TZVP']
    elif software == 'qchem':
        conformer_trsh_methods = ['wb97x-d3/def2-TZVP']
    elif software == 'orca':
        conformer_trsh_methods = ['wB97X-D3/def2-TZVP']
    elif software == 'terachem':
        conformer_trsh_methods = ['wb97xd3/def2-TZVP']
    else:
        logger.warning(f'The troubleshoot_conformer_isomorphism() method is not implemented for {software}.')
        return None

    level_of_theory = None
    for method in conformer_trsh_methods:
        if 'conformer ' + method in ess_trsh_methods:
            continue
        ess_trsh_methods.append('conformer ' + method)
        level_of_theory = method
        break
    return level_of_theory



[docs]
def trsh_job_queue(server: str,
                    job_name: str,
                    max_time: int = 24,
                    attempted_queues: list = None,
                    ) -> Tuple[dict, bool]:
    """ A function to troubleshoot job queue issues. This function will attempt to determine if the user has provided a queue that provides more time than the walltime failed queue.
        If not, it will attempt to determine if there are any other queues available on the server that provide more time than the walltime failed queue.

    Args:
        server (str): Name of the server
        job_name (str): Name of the job
        max_time (int, optional): The max time that the current queue that the job failed on provied. Defaults to 24, measured in hours.
        attempted_queues (list, optional): Any queues that have already been attempted to run the job on. Defaults to None.

    Returns:
        Tuple[dict, bool]: A dictionary of the available queues and a boolean indicating if the function was successful.
    """

    server_queues = servers[server].get('queues', dict())
    cluster_soft = servers[server].get('cluster_soft','Undefined')
    excluded_queues = servers[server].get('excluded_queues', list())

    # Check if there are any available queues in server_queues that hasn't been tried yet
    if len(server_queues) >  1:
        # Make sure that the queue is not already in the attempted_queues list
        server_queues = {
            queue: walltime for queue, walltime in server_queues.items()
            if (attempted_queues is None or queue not in attempted_queues)
            and convert_to_hours(walltime) >= max_time
        }
        if len(server_queues) == 0:
            logger.error(f' Could not troubleshoot {job_name} on {server} as all available queues have been tried. Will attempt to query the server for additional queues.')
        else:
            return server_queues, True
    # If the server is PBS, query which queues are available
    if cluster_soft.lower() == 'pbs':
        # First determine which group the current user belongs to
        cmd = 'groups'
        output = execute_command(cmd, shell=True)
        if 'Error' in output:
            logger.error(f'Could not troubleshoot {job_name} on {server} as the groups command failed.')
            return None, False
        else:
            # (['users zeus-users vkm-users gaussian grinberg-dana_prj docker'], [])
            user_groups = output[0][0].split()
            if len(user_groups) == 0:
                logger.error(f'Could not troubleshoot {job_name} on {server} as the groups command did not return any groups.')
                return None, False
            # check if the term '-users' is in the group name, if so, remove it
            elif any('-users' in group for group in user_groups):
                user_groups = [group.replace('-users', '') for group in user_groups]
        # Now query the queues
        cmd = 'qstat -q'
        output_queues = execute_command(cmd, shell=True)
        if 'Error' in output_queues:
            logger.error(f'Could not troubleshoot {job_name} on {server} as the qstat command failed.')
            return None, False
        else:
            # Need to parse output
            # Example:
            #             Queue            Memory CPU Time Walltime Node   Run   Que   Lm  State
            # ---------------- ------ -------- -------- ---- ----- ----- ----  -----
            # workq              --      --       --     --      0     0   --   D S
            # maytal_q           --      --       --     --      7     0   --   E R
            # vkm_all_q          --      --       --     --      0     0   --   D R
            # zeus_temp          --      --    24:00:00  --      0     0   --   D S
            # dagan_q            --      --       --     --      0     0   --   E R
            # frankel_q          --      --       --     --      0     0   --   E R
            # vkm_gm_q           --      --       --     --      0     0   --   E R
            # zeus_all_scalar    --      --    24:00:00  --      0     0   --   D S
            # yuval_q            --      --       --     --      0     0   --   E R
            # dan_q              --      --       --     --      7     0   --   E R
            # zeus_all_q         --      --    24:00:00  --      4     0   --   E R
            # zeus_long_q        --      --    168:00:0  --      0     0   --   E R
            # zeus_short_q       --      --    03:00:00  --      0     0   --   E R
            # gpu_v100_q         --      --    480:00:0  --      0     0   --   E R
            # brandon_q          --      --       --     --      0     0   --   E R
            # karp_q             --      --       --     --      0     0   --   E R
            # mafat_gm_q         --      --       --     --      0     0   --   E R
            # training_q         --      --    24:00:00  --      0     0   --   E R
            # mafat4_q           --      --       --     --      0     0   --   D R
            # zeus_new_q         --      --    72:00:00  --      0     0   --   E R
            # zeus_combined_q    --      --    24:00:00  --     17     0   --   E R
            # zeus_comb_short    --      --    03:00:00  --      0     0   --   E R
            # mafat_new_q        --      --       --     --     46     0   --   E R
            # dagan_comb_q       --      --       --     --      0     0   --   E R
            # dan_comb_q         --      --       --     --      0     0   --   E R
            # karp_comb_q        --      --       --     --      0     0   --   D R
            # brandon_comb_q     --      --       --     --      0     0   --   E R
            # train_gpu_q        --      --    24:00:00  --      0     0   --   E R
            #                                                ----- -----
            #                                                   81     0
            # 1. Get the queue names in the first column, and check if the state is 'E' (enabled) or 'D' (disabled) - Select only enabled queues
            # 2. Once we have a list of queues, we need to make sure we can submit to them. We can do this by check qstat -Qf <queue_name> and see output of acl_groups
            # 3. We also need to get the wall time for each queue
            queues = {}
            for line in output_queues[0]:
                if line.strip() and not line.startswith("Queue") and not line.startswith('----') and not line.startswith('server'):
                    parts = line.split()
                    if len(parts) >= 9 and parts[8] == 'E':
                        queue_name = parts[0]
                        acl_groups = None
                        cmd = f'qstat -Qf {queue_name}'
                        output_specific_queue = execute_command(cmd, shell=True)
                        # Parse Example:
                        #
                        # Queue: mafat_new_q
                        # queue_type = Execution
                        # total_jobs = 44
                        # state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:44 Exiting:0 Begu
                        # n:0
                        # resources_default.walltime = 3600:00:00
                        # acl_group_enable = True
                        # acl_groups = grinberg-dana_prj,halupovich_prj,yuvallevy_prj
                        # default_chunk.qlist = mafat_new_q
                        # resources_assigned.mem = 376625977kb
                        # resources_assigned.mpiprocs = 132
                        # resources_assigned.ncpus = 3254
                        # resources_assigned.nodect = 47
                        # max_run_res.ncpus = [o:PBS_ALL=3584]
                        # enabled = True
                        # started = True
                        for line_queue in output_specific_queue[0]:
                            # Check walltime
                            if line_queue.strip().startswith('resources_default.walltime'):
                                walltime = line_queue.split('=')[1].strip()
                                queues[queue_name] = walltime
                            if line_queue.strip().startswith('acl_groups'):
                                acl_groups = True
                                groups = line_queue.split('=')[1].strip().split(',')
                                if any(group in user_groups for group in groups):
                                    break  # User is in one of the acl_groups, keep the queue
                                else:
                                    queues.pop(queue_name, None)  # User is not in acl_groups, remove the queue
                                    break
                        if not any(group in queue_name for group in user_groups) and acl_groups is None:
                            queues.pop(queue_name, None)  # Queue name does not contain any of the user's groups, remove the queue
            # Check if any of the found queues are part of the excluded queues list
            if excluded_queues:
                for queue in excluded_queues:
                    if queue in queues:
                        queues.pop(queue, None)
            if len(queues) == 0:
                logger.error(f'Could not troubleshoot {job_name} on {server} as no queues were found.')
                return None, False
            else:
                return queues, True
    else:
        logger.error(f'Could not troubleshoot queue for {job_name} since the server is {cluster_soft} and not PBS.')
        return None, False



[docs]
def trsh_job_on_server(server: str,
                       job_name: str,
                       job_id: Union[int, str],
                       job_server_status: str,
                       remote_path: str,
                       server_nodes: list = None):
    """
    Troubleshoot server errors.

    Args:
        server (str): The server name.
        job_name (str): The job's name (e.g., 'opt_a103').
        job_id (int, str): The job's ID on the server.
        job_server_status (str): The job server status (either 'initializing', 'running', 'errored', or 'done').
        remote_path (str): The remote path to the job folder.
        server_nodes (list, optional): The nodes already tried on this server for this job.

    Returns: Tuple[str, bool]
        - The new node on the server (or None).
        - Whether to re-run the job, `True` to rerun.
    """
    server_nodes = server_nodes if server_nodes is not None else list()
    cluster_soft = servers[server]['cluster_soft']
    if job_server_status != 'done':
        logger.error(f'Job {job_name} has server status "{job_server_status}" on {server}.')

    # delete current server run
    if server == 'local':
        cmd = delete_command[cluster_soft] + ' ' + str(job_id)
        execute_command(cmd)
        return None, True
    else:
        with SSHClient(server) as ssh:
            ssh.delete_job(job_id)

    # find available node
    logger.error('Troubleshooting by changing node.')
    ssh = SSHClient(server)
    nodes = ssh.list_available_nodes()
    for node in nodes:
        if node not in server_nodes:
            server_nodes.append(node)
            break
    else:
        logger.error(f'Could not find an available node on the server {server}')
        # TODO: continue troubleshooting; if all else fails, put the job to sleep,
        #       and try again searching for a node
        return None, False

    # modify the submit file
    remote_submit_file = os.path.join(remote_path, submit_filenames[cluster_soft])
    with SSHClient(server) as ssh:
        content = ssh.read_remote_file(remote_file_path=remote_submit_file)
    if cluster_soft.lower() == 'oge':
        node_assign = '#$ -l h='
        insert_line_num = 7
    elif cluster_soft.lower() == 'slurm':
        node_assign = '#$BATCH -w, --nodelist='
        insert_line_num = 5
    else:
        # Other software?
        logger.denug(f'Unknown cluster software {cluster_soft} is encountered when '
                     f'troubleshooting by changing node.')
        return None, False
    for i, line in enumerate(content):
        if node_assign in line:
            content[i] = node_assign + node
        break
    else:
        content.insert(insert_line_num, node_assign + node)
    content = ''.join(content)  # convert list into a single string, not to upset paramiko

    # resubmit
    with SSHClient(server) as ssh:
        ssh.upload_file(remote_file_path=os.path.join(remote_path,
                        submit_filenames[cluster_soft]), file_string=content)
    return node, True




[docs]
def scan_quality_check(label: str,
                       pivots: list,
                       energies: list,
                       scan_res: float = rotor_scan_resolution,
                       used_methods: Optional[list] = None,
                       log_file: Optional[str] = None,
                       species: Optional[ARCSpecies] = None,
                       preserve_params: Optional[list] = None,
                       trajectory: Optional[list] = None,
                       original_xyz: Optional[dict] = None,
                       ) -> Tuple[bool, str, str, dict]:
    """
    Checks the scan's quality:

    1. Based on intermediate conformers if available:

       - whether the initial and final points are consistent
       - whether it is relatively "smooth"

    2. Based on the PES curve (if intermediate conformers are unavailable):

       - whether the initial and final points are consistent
       - whether it is relatively "smooth"

    3. Common:

       - whether the optimized geometry indeed represents the minimum energy conformer (for a non-TS species)
       - whether the barrier height is reasonable

    4. Based on requested parameters to preserve:

       - whether specified atom distances to preserve criteria aren't violated

    Args:
        label (str): The species label.
        pivots (list): The rotor pivots.
        energies (list): The scan energies in kJ/mol.
        scan_res (float, optional): The scan resolution in degrees.
        used_methods (list, optional): Troubleshooting methods already tried out.
        log_file (str, optional): The path to the output file.
        species (ARCSpecies, optional): The ARCSpecies this scan is related to.
        preserve_params (list, optional): Entries are length 2 lists of atom indices (1-indexed) between which the
                                          distance as well as a torsion dihedral angle with these atoms as its pivots
                                          must be preserved throughout the scan to a tolerance.
        trajectory (list, optional): Entries are Cartesian coordinates along the scan trajectory.
        original_xyz (dict, optional): The optimized coordinated for the species.

    Returns: Tuple[bool, str, str, dict]
        - Whether to invalidate this rotor, ``True`` to invalidate.
        - Reason for invalidating this rotor.
        - Error or warning message.
        - Troubleshooting methods to apply, including conformational changes.

    Todo:
        - adjust to ND
    """
    message, invalidation_reason = '', ''
    invalidate = False
    actions = dict()
    used_methods = used_methods or list()
    energies = np.array(energies, np.float64)

    if (energies.size == 0 or
            energies.ndim == 0 or
            (energies.size == 1 and np.isnan(energies.item())) or
            np.all(np.isnan(energies))):
        invalidate = True
        invalidation_reason = "No valid scan energies parsed."
        message = f"Could not parse scan energies for {label} between pivots {pivots}."
        logger.error(message)
        actions = {}
        return invalidate, invalidation_reason, message, actions

    scan_conformers = None

    if log_file:
        try:
            scan_conformers = parse_scan_conformers(log_file)
        except NotImplementedError:
            message = (f'Rotor scan quality check using conformer internal coordinates '
                       f'has not been implemented for current ESS. Using PES curve based '
                       f'check for rotor scan of {label} between pivots {pivots}.')
            logger.warning(message)

    if scan_conformers is not None and (species is None or not species.is_ts):
        bonds = scan_conformers[scan_conformers['type'] == 'R']
        angles = scan_conformers[scan_conformers['type'] == 'A']
        non_scan_rotor = scan_conformers[(scan_conformers['type'] == 'D') & (scan_conformers['scan'] == False)]
        scan_rotor = scan_conformers[scan_conformers['scan'] == True]

        expected_step_num = int(360 / scan_res)
        actual_step_num = scan_conformers.shape[1] - 5  # 5 for meta columns
        step_num = min(expected_step_num, actual_step_num)
        changed_ic_dict = {}

        for index_1 in range(step_num + 1):
            if index_1 != 0:
                index_2 = index_1 - 1
                delta = scan_res
            elif step_num == expected_step_num:
                index_2 = step_num
                delta = 0
            else:
                continue

            col1 = f'conformer_{index_1}'
            col2 = f'conformer_{index_2}'

            # Only check if both columns exist (robust to incomplete scans)
            if col1 not in bonds.columns or col2 not in bonds.columns:
                continue

            bond_change = (2 * (bonds[col1] - bonds[col2]) /
                           (bonds[col1] + bonds[col2])).abs() > preserve_params_in_scan['bond']
            angle_change = (angles[col1] - angles[col2]).abs() > preserve_params_in_scan['angle']
            non_scan_rotor_change = check_torsion_change(torsions=non_scan_rotor,
                                                         index_1=col1,
                                                         index_2=col2,
                                                         threshold=preserve_params_in_scan['dihedral'])
            scan_rotor_change = check_torsion_change(torsions=scan_rotor,
                                                     index_1=col1,
                                                     index_2=col2,
                                                     threshold=preserve_params_in_scan['dihedral'],
                                                     delta=delta)
            change_sum = pd.concat([bond_change,
                                    angle_change,
                                    non_scan_rotor_change,
                                    scan_rotor_change])
            changed_ics = change_sum[change_sum == True].index.to_list()
            if changed_ics:
                invalidate = True
                changed_ic_dict.update({index_1: changed_ics})

        for conf_index, ics in changed_ic_dict.items():
            broken_bonds = [ic for ic in ics if 'R' in ic]
            if broken_bonds and conf_index != 0:
                bonds_sub = scan_conformers.loc[broken_bonds, :]
                col1 = f'conformer_{conf_index}'
                col2 = f'conformer_{conf_index - 1}'
                if col1 not in bonds_sub.columns or col2 not in bonds_sub.columns:
                    continue
                bond_change = (2 * (bonds_sub[col1] - bonds_sub[col2]) /
                               (bonds_sub[col1] + bonds_sub[col2])).abs()
                broken_bond_label = bond_change.sort_values().index[-1]
                broken_bonds_atoms = scan_conformers.at[broken_bond_label, 'atoms']
                invalidate = True
                invalidation_reason = f'Bond ({broken_bonds_atoms}) broke during the scan.'
                message = (f'Rotor scan of {label} between pivots {pivots} has broken bonds: '
                           f'{broken_bonds_atoms}. ARC will attempt to troubleshoot this rotor scan.')
                logger.error(message)
                actions = {'freeze': [broken_bonds_atoms]}
                return invalidate, invalidation_reason, message, actions

        energy_diff = energies[0] - np.min(energies)
        if energy_diff >= 0.5 or energy_diff > 0.5 * (max(energies) - min(energies)) \
                and (species is None or not species.is_ts):
            invalidate = True
            invalidation_reason = (f'Another conformer for {label} exists which is '
                                   f'{energy_diff:.2f} kJ/mol lower.')
            message = (f'Species {label} is not oriented correctly around pivots {pivots}, '
                       f'searching for a better conformation...')
            logger.info(message)
            min_index = int(np.argmin(energies))
            conf_xyzs = parse_1d_scan_coords(log_file)
            if len(conf_xyzs) > min_index:
                actions = {'change conformer': conf_xyzs[min_index]}
                return invalidate, invalidation_reason, message, actions

        if 0 in changed_ic_dict.keys() and len(changed_ic_dict) == 1:
            invalidate = True
            invalidation_reason = 'Inconsistent initial and final conformers'
            message = (f'Rotor scan of {label} between pivots {pivots} has inconsistent initial '
                       f'and final conformers.\nInternal coordinates {changed_ic_dict[0]} are different. '
                       f'ARC will attempt to troubleshoot this rotor scan.')
            logger.error(message)
            actions = {'freeze': [scan_conformers.at[ic_label, 'atoms']
                                  for ic_label in changed_ic_dict[0]]}
            return invalidate, invalidation_reason, message, actions
        elif len(changed_ic_dict) > 0:
            invalidate = True
            invalidation_reason = 'Significant difference observed between consecutive conformers'
            message = (f'Rotor scan of {label} between pivots {pivots} is inconsistent between '
                       f'two consecutive conformers.\nInconsistent consecutive conformers and problematic '
                       f'internal coordinates:')
            changed_ic_label = []
            for index, ics in changed_ic_dict.items():
                if index > 0:
                    message += f'\nconformer #{index:>3d} / #{index+1:>3d}        '
                    message += ', '.join(ics)
                changed_ic_label += ics
            message += '\nARC will attempt to troubleshoot this rotor scan.'
            changed_ic_label = list(set(changed_ic_label))
            logger.error(message)
            actions = {'freeze': [scan_conformers.at[ic_label, 'atoms']
                                  for ic_label in changed_ic_label]}
            return invalidate, invalidation_reason, message, actions

    else:
        if abs(energies[-1] - energies[0]) > inconsistency_az:
            invalidate = True
            invalidation_reason = (f'initial and final points are inconsistent by more than {inconsistency_az:.2f} kJ/mol')
            message = (f'Rotor scan of {label} between pivots {pivots} is inconsistent by more '
                       f'than {inconsistency_az:.2f} kJ/mol between initial and final positions. '
                       f'Initial energy = {energies[0]:.2f}, final energy = {energies[-1]:.2f}. ARC will '
                       f'attempt to troubleshoot this rotor scan.')
            logger.error(message)
            actions = {'inc_res': None, 'freeze': 'all'}
            return invalidate, invalidation_reason, message, actions

        for j in range(len(energies) - 1):
            if abs(energies[j] - energies[j + 1]) > inconsistency_ab * np.max(energies):
                invalidate = True
                invalidation_reason = (f'Two consecutive points are inconsistent by more than '
                                       f'{inconsistency_ab * max(energies):.2f} kJ/mol')
                message = (f'Rotor scan of {label} between pivots {pivots} is inconsistent by '
                           f'more than {inconsistency_ab * max(energies):.2f} kJ/mol between '
                           f'two consecutive points. ARC will attempt to troubleshoot this rotor scan.')
                logger.error(message)
                if any(['scan_res' in used_method for used_method in used_methods]):
                    if not any([used_method['scan_trsh'] != '' for used_method in used_methods]):
                        actions = {'inc_res': None, 'freeze': 'all'}
                    else:
                        pass
                else:
                    actions = {'inc_res': None}
                return invalidate, invalidation_reason, message, actions

        energy_diff = energies[0] - np.min(energies)
        if energy_diff >= 2 or energy_diff > 0.5 * (max(energies) - min(energies)) \
                and (species is None or not species.is_ts):
            invalidate = True
            invalidation_reason = f'Another conformer for {label} exists which is {energy_diff:.2f} kJ/mol lower.'
            message = (f'Species {label} is not oriented correctly around pivots {pivots}. '
                       f'Another conformer exists which is {energy_diff:.2f} kJ/mol lower. '
                       f'searching for a better conformation...')
            logger.info(message)
            min_index = int(np.argmin(energies))
            conf_xyzs = parse_1d_scan_coords(log_file)
            actions = {'change conformer': conf_xyzs[min_index]}
            return invalidate, invalidation_reason, message, actions

    if (np.max(energies) - np.min(energies)) > maximum_barrier:
        num_wells = determine_rotor_symmetry(label=label,
                                             pivots=pivots,
                                             rotor_path='',
                                             energies=energies,
                                             return_num_wells=True,
                                             log=False,
                                             )[-1]
        if num_wells == 1:
            invalidate = True
            invalidation_reason = (f'The rotor scan has a barrier of {np.max(energies) - np.min(energies):.2f} '
                                   f'kJ/mol, which is higher than the maximal barrier for rotation '
                                   f'({maximum_barrier:.2f} kJ/mol)')
            message = (f'Rotor scan of {label} between pivots {pivots} has a barrier '
                       f'larger than {maximum_barrier:.2f} kJ/mol. Invalidating rotor.')
            logger.warning(message)
            return invalidate, invalidation_reason, message, actions
        else:
            logger.warning(f'The maximal barrier for rotor {pivots} of {label} is '
                           f'{(np.max(energies) - np.min(energies)):.2f} kJ/mol, which is higher than the set threshold '
                           f'of {maximum_barrier} kJ/mol. Since this mode when treated as torsion has {num_wells}, '
                           f'this mode is not invalidated: treating it as a vibrational mode will be less accurate than '
                           f'the hindered rotor treatment, since the entropy contribution from the population of '
                           f'this species at the higher wells will not be taken into account. NOT invalidating this '
                           f'torsional mode.')

    if preserve_params is not None:
        success = True
        pivots = list()
        for atoms in preserve_params:
            for i, xyz in enumerate(trajectory):
                if i != 0:
                    current_distance = calculate_distance(coords=xyz, atoms=atoms, index=1)
                    previous_distance = calculate_distance(coords=trajectory[i - 1], atoms=atoms, index=1)
                    original_distance = calculate_distance(coords=original_xyz, atoms=atoms, index=1)
                    if previous_distance * (1.0 - preserve_params_in_scan['bond']) < \
                            current_distance < \
                            previous_distance * (1.0 + preserve_params_in_scan['bond']) \
                            or original_distance * (1.0 - preserve_params_in_scan['bond']) < \
                            current_distance < \
                            original_distance * (1.0 + preserve_params_in_scan['bond']):
                        success = False
                        pivots.append(atoms)
                        message = (f"The rotor breaks the TS around pivots {pivots}: In trajectory {i}, the distance "
                                   f"between pivots is {current_distance} Angstroms, which is "
                                   f"{current_distance / previous_distance:.2f} of the previous frame, and "
                                   f"{current_distance / original_distance:.2f} of the original geometry.")
                        break

                    if species.mol is not None:
                        scan = [determine_smallest_atom_index_in_scan(atom1=species.mol.atoms.index(atoms[0]),
                                                                      atom2=species.mol.atoms.index(atoms[1]),
                                                                      mol=species.mol)]
                        scan.extend(atoms)
                        scan.append(
                            determine_smallest_atom_index_in_scan(atom1=species.mol.atoms.index(atoms[1]),
                                                                  atom2=species.mol.atoms.index(atoms[0]),
                                                                  mol=species.mol))
                        current_dihedral = calculate_dihedral_angle(coords=xyz, torsion=scan, index=1)
                        previous_dihedral = calculate_dihedral_angle(coords=trajectory[i - 1], torsion=scan, index=1)
                        original_dihedral = calculate_dihedral_angle(coords=original_xyz, torsion=scan, index=1)
                        if abs(current_dihedral - previous_dihedral) < preserve_params_in_scan['dihedral'] \
                                or abs(current_dihedral - original_dihedral) < preserve_params_in_scan['dihedral']:
                            success = False
                            pivots.append(atoms)
                            message = (f"The rotor breaks the TS around pivots {pivots}: In trajectory {i}, the "
                                       f"dihedral angle is {current_dihedral} degrees, a "
                                       f"{abs(current_dihedral - previous_dihedral)} change relative to the previous "
                                       f"frame, and a {abs(current_dihedral - original_dihedral)} change relative to "
                                       f"the original geometry.")
                            break

        if species.mol is None:
            logger.warning(
                f'Cannot check that the dihedral angle of {species.label} is consistent throughout rotor '
                f'scans without a .mol attribute')
        if not success:
            invalidate = True
            invalidation_reason = message
            logger.info(message)
            actions = dict()
            return invalidate, invalidation_reason, message, actions

    return invalidate, invalidation_reason, message, actions



[docs]
def trsh_keyword_checkfile(job_status, ess_trsh_methods, couldnt_trsh) -> Tuple[bool, List, bool]:
    """
    Check if the job requires removal of checkfile
    """
    if 'CheckFile' in job_status.get('keywords', '') and 'checkfile=None' not in ess_trsh_methods:
        ess_trsh_methods.append('checkfile=None')
        couldnt_trsh = False
        return True, ess_trsh_methods, couldnt_trsh
    elif 'checkfile=None' in ess_trsh_methods:
        couldnt_trsh = False
        return True, ess_trsh_methods, couldnt_trsh

    return False, ess_trsh_methods, couldnt_trsh



[docs]
def trsh_keyword_intaccuracy(ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    Check if the job requires change of 2 electron integral accuracy
    """
    if 'int=(Acc2E=14)' not in ess_trsh_methods:
        ess_trsh_methods.append('int=(Acc2E=14)')
        trsh_keyword.append('int=(Acc2E=14)')
        couldnt_trsh = False
    elif 'int=(Acc2E=14)' not in trsh_keyword and 'int=(Acc2E=14)' in ess_trsh_methods:
        trsh_keyword.append('int=(Acc2E=14)')
        couldnt_trsh = False

    return ess_trsh_methods, trsh_keyword, couldnt_trsh



[docs]
def trsh_keyword_cartesian(job_status, ess_trsh_methods, job_type, trsh_keyword: list, couldnt_trsh: bool) -> Tuple[List, List, bool]:
    """
    Check if the job requires change of cartesian coordinate
    """
    if 'InternalCoordinateError' in job_status['keywords'] \
                and 'cartesian' not in ess_trsh_methods:
        ess_trsh_methods.append('cartesian')
        trsh_keyword.append('opt=(cartesian)')
        couldnt_trsh = False
    elif 'cartesian' in ess_trsh_methods and \
            job_type == 'opt' and 'cartesian' not in trsh_keyword:
        trsh_keyword.append('opt=(cartesian)')
        couldnt_trsh = False

    return ess_trsh_methods, trsh_keyword, couldnt_trsh



[docs]
def trsh_keyword_scf(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    Check if the job requires change of scf
    """
    scf_pattern = r"scf=\((.*?)\)" # e.g., scf=(xqc,MaxCycle=1000), will match xqc,MaxCycle=1000
    if 'SCF' in job_status['keywords'] and 'scf=(qc)' not in ess_trsh_methods and 'no_qc' not in ess_trsh_methods:
        # try both qc and nosymm
        ess_trsh_methods.append('scf=(qc)')
        couldnt_trsh = False
    elif 'SCF' in job_status['keywords'] and 'scf=(NDamp=30)' not in ess_trsh_methods:
        # Switching off Pulay's Direct Inversion
        ess_trsh_methods.append('scf=(NDamp=30)')
        couldnt_trsh = False
    elif 'SCF' in job_status['keywords'] and 'scf=(NoDIIS)' not in ess_trsh_methods:
        # DIIS is the default method for speeding up the SCF convergence, but sometimes it make SCF not converge. 
        ess_trsh_methods.append('scf=(NoDIIS)')
        couldnt_trsh = False
    elif 'SCF' in job_status['keywords'] and 'guess=INDO' not in ess_trsh_methods:
        ess_trsh_methods.append('guess=INDO')
        couldnt_trsh = False
        trsh_keyword.append('guess=INDO')
    # If we have attempted all scf methods above, then we will try last resort methods
    if 'SCF' in job_status['keywords'] and 'scf=(qc)' in ess_trsh_methods and 'scf=(NDamp=30)' in ess_trsh_methods and 'scf=(NoDIIS)' in ess_trsh_methods and 'guess=INDO' in ess_trsh_methods \
        and 'scf=(Fermi)' not in ess_trsh_methods and 'scf=(Noincfock)' not in ess_trsh_methods and 'scf=(NoVarAcc)' not in ess_trsh_methods:
        # Uses Fermi broadening to help SCF convergence
        ess_trsh_methods.append('scf=(Fermi)')
        # Gaussian uses Incremental Fock by default to construct the Fock matrix in an approximate manner to significantly save time at each step of the iterative process, but may thus hinder convergence
        ess_trsh_methods.append('scf=(Noincfock)')
        ess_trsh_methods.append('scf=(NoVarAcc)')
        couldnt_trsh = False
    if 'no_qc' in ess_trsh_methods and 'scf=(qc)' in ess_trsh_methods:
        ess_trsh_methods.remove('scf=(qc)')
        couldnt_trsh = False

    if any('scf' in keyword for keyword in ess_trsh_methods):
        scf_list = [match for element in ess_trsh_methods for match in re.findall(scf_pattern, element)] if any(re.search(scf_pattern, element) for element in ess_trsh_methods) else []
        trsh_keyword.append('scf=(' + ','.join(scf_list) + ')')

    return ess_trsh_methods, trsh_keyword, couldnt_trsh



[docs]
def trsh_keyword_unconverged(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh, fine) -> Tuple[List, List, bool, bool]:
    """
    Check if the job requires change of scf
    """
    if 'Unconverged' in job_status['keywords'] and 'fine' not in ess_trsh_methods and not fine:
        # try a fine grid for SCF and integral
        ess_trsh_methods.append('fine')
        fine = True
        couldnt_trsh = False

    return ess_trsh_methods, trsh_keyword, fine, couldnt_trsh



[docs]
def trsh_keyword_nosymm(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    Check if the job requires change of nosymm
    """
    if 'NoSymm' in job_status['keywords'] and 'NoSymm' not in ess_trsh_methods:
        ess_trsh_methods.append('NoSymm')
        trsh_keyword.append('nosymm')
        couldnt_trsh = False
    elif 'nosymm' not in trsh_keyword and any('NoSymm' in keyword for keyword in ess_trsh_methods):
        trsh_keyword.append('nosymm')
        couldnt_trsh = False

    return ess_trsh_methods, trsh_keyword, couldnt_trsh



[docs]
def trsh_keyword_opt_maxcycles(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    Check if the job requires change of opt(maxcycle=200)
    """
    opt_pattern = r"opt=\((.*?)\)"
    if 'MaxOptCycles' in job_status['keywords'] and 'opt=(maxcycle=200)' not in ess_trsh_methods:
        ess_trsh_methods.append('opt=(maxcycle=200)')
        trsh_keyword.append('opt=(maxcycle=200)')
        couldnt_trsh = False
    elif 'MaxOptCycles' in job_status['keywords'] and 'opt=(RFO)' not in ess_trsh_methods:
        ess_trsh_methods.append('opt=(RFO)')
        trsh_keyword.append('opt=(RFO)')
        couldnt_trsh = False
    elif 'MaxOptCycles' in job_status['keywords']  and 'opt=(RFO)' in ess_trsh_methods and 'opt=(GDIIS)' not in ess_trsh_methods:
        ess_trsh_methods.append('opt=(GDIIS)')
        trsh_keyword.append('opt=(GDIIS)')
        couldnt_trsh = False
    elif 'MaxOptCycles' in job_status['keywords']  and 'opt=(RFO)' in ess_trsh_methods and 'opt=(GDIIS)' in ess_trsh_methods and 'opt=(GEDIIS)' not in ess_trsh_methods:
        ess_trsh_methods.append('opt=(GEDIIS)')
        trsh_keyword.append('opt=(GEDIIS)')
        couldnt_trsh = False
    
    if any('opt' in keyword for keyword in ess_trsh_methods):
        opt_list = [match for element in ess_trsh_methods for match in re.findall(opt_pattern, element)] if any(re.search(opt_pattern, element) for element in ess_trsh_methods) else []

        if opt_list:

            filtered_methods = prioritize_opt_methods(opt_list)

            new_opt_keyword = 'opt=(' + ','.join(filtered_methods) + ')'

            trsh_keyword = [kw if not kw.startswith('opt') else new_opt_keyword for kw in trsh_keyword]
    
    return ess_trsh_methods, trsh_keyword, couldnt_trsh



[docs]
def trsh_keyword_inaccurate_quadrature(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    Check if the job requires change of inaccurate quadrature
    
    Explanation

        The integral not enough, under DFT calculations with some basis sets.

    Fixing

        Check the input file, whether there was some miss in basis set or unreasonable structure. If not, use one of following keywords:

            1. int=ultrafine (default in Gaussian 16), or int=grid=300590.
            2. SCF=novaracc.
            3. guess=INDO.
        If not work, use (1)~(3) at same time.

    """
    if 'InaccurateQuadrature' in job_status['keywords'] and 'int=grid=300590' not in ess_trsh_methods:
        ess_trsh_methods.append('int=grid=300590')
        trsh_keyword.append('int=grid=300590')
        couldnt_trsh = False
    elif 'InaccurateQuadrature' in job_status['keywords'] and 'int=grid=300590' in ess_trsh_methods and 'scf=(NoVarAcc)' not in ess_trsh_methods:
        # Gaussian automatically reduces the integration grid at the beginning of the calculation to speed it up, but there is a risk that SCF may not converged. For the calculations using diffuse functions with SCF not converged error use NoVarAcc
        ess_trsh_methods.append('scf=(NoVarAcc)')
        couldnt_trsh = False
    elif 'InaccurateQuadrature' in job_status['keywords'] and 'int=grid=300590' in ess_trsh_methods and 'scf=(NoVarAcc)' in ess_trsh_methods and 'guess=INDO' not in ess_trsh_methods:
        # Change the convergence method to INDO
        ess_trsh_methods.append('guess=INDO')
        trsh_keyword.append('guess=INDO')
        couldnt_trsh = False
    elif 'InaccurateQuadrature' in job_status['keywords'] and 'int=grid=300590' in ess_trsh_methods and 'scf=(NoVarAcc)' in ess_trsh_methods and 'guess=INDO' in ess_trsh_methods and 'int=ultrafine' not in ess_trsh_methods:
        # Try all methods above
        trsh_keyword.append('int=grid=300590')
        trsh_keyword.append('guess=INDO')
        # NoVarAcc is not included in trsh_keyword, because it will be in ess_trsh_methods
        
    scf_pattern = r"scf=\((.*?)\)" # e.g., scf=(xqc,MaxCycle=1000), will match xqc,MaxCycle=1000
    if any('scf' in keyword for keyword in ess_trsh_methods):
        scf_list = [match for element in ess_trsh_methods for match in re.findall(scf_pattern, element)] if any(re.search(scf_pattern, element) for element in ess_trsh_methods) else []
        trsh_keyword.append('scf=(' + ','.join(scf_list) + ')')
    
    return ess_trsh_methods, trsh_keyword, couldnt_trsh



[docs]
def trsh_keyword_l123(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    When a job fails with l123.exe error, there are two possible solutions based upon the error message:
    1. If Delta-X issue, will need to adjust the maxcycle of IRC job. If fails, then change algorithm to LQA.
    2. GS2 Optimization failed, then try to change the algorithm to LQA.
    """
    irc_pattern = r"irc=\((.*?)\)"
    if 'DeltaX' in job_status['keywords'] and 'irc=(maxcycle=200)' not in ess_trsh_methods:
        ess_trsh_methods.append('irc=(maxcycle=200)')
        trsh_keyword.append('irc=(maxcycle=200)')
        couldnt_trsh = False
    elif 'DeltaX' in job_status['keywords'] and 'irc=(LQA)' not in ess_trsh_methods:
        ess_trsh_methods.append('irc=(LQA)')
        trsh_keyword.append('irc=(LQA)')
        couldnt_trsh = False
    elif 'GS2Opt' in job_status['keywords'] and 'irc=(LQA)' not in ess_trsh_methods:
        ess_trsh_methods.append('irc=(LQA)')
        trsh_keyword.append('irc=(LQA)')
        couldnt_trsh = False

    if any('irc' in keyword for keyword in ess_trsh_methods):
        scf_list = [match for element in ess_trsh_methods for match in re.findall(irc_pattern, element)] if any(re.search(irc_pattern, element) for element in ess_trsh_methods) else []
        trsh_keyword.append('irc=(' + ','.join(scf_list) + ')')

    return ess_trsh_methods, trsh_keyword, couldnt_trsh



[docs]
def trsh_keyword_neg_eigen(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    Gaussian will check the number of negative frequency after finishing the TS optimization. 
    If there is more than one negative frequency, Gaussian will stop the calculation.
    """
    if 'NegEigenvalues' in job_status['keywords'] and 'opt=(noeigen)' not in ess_trsh_methods:
        ess_trsh_methods.append('opt=(noeigen)')
        trsh_keyword.append('opt=(noeigen)')
        couldnt_trsh = False
    
    return ess_trsh_methods, trsh_keyword, couldnt_trsh


def prioritize_opt_methods(opt_methods):

    preferred_order = ['GEDIIS', 'GDIIS', 'RFO']
    selected_method = None
    
    for method in preferred_order:
        if method in opt_methods:
            selected_method = method
            break
    
    filtered_methods = [method for method in opt_methods if method not in preferred_order or method == selected_method]

    return filtered_methods


[docs]
def trsh_keyword_no_qc(job_status, ess_trsh_methods, trsh_keyword, couldnt_trsh) -> Tuple[List, List, bool]:
    """
    When a job fails with no qc, there are two possible solutions based upon the error message:
    1. If SCF fails, then try to change the algorithm to LQA.
    2. If SCF fails, then try to change the algorithm to LQA.
    """
    if 'no_xqc' in job_status['keywords'] and 'scf=(qc)' in ess_trsh_methods:
        ess_trsh_methods.remove('scf=(qc)')
        ess_trsh_methods.append('no_xqc')
        couldnt_trsh = False


    return ess_trsh_methods, trsh_keyword, couldnt_trsh