"""Calcfunctions used throughout workchains"""
# pylint: disable=c-extension-no-member
# pylint:disable=no-member
import io
from string import digits
import ase.io
from aiida.engine import calcfunction
from aiida.orm import Dict, SinglefileData, Str, StructureData
from rdkit import Chem
from rdkit.Chem import AllChem, rdmolops, rdqueries
from rdkit.Chem.MolKey.MolKey import BadMoleculeException
from subproptools.sub_reor import rotate_substituent_aiida
@calcfunction
[docs]def generate_rotated_structure_aiida(FolderData, atom_dict, cc_dict):
"""Rotates the fragment to the defined coordinate system
Args:
FolderData (aiida.orm.FolderData): aim calculation folder
atom_dict: AIM atom dict
cc_dict: AIM cc_dict
Returns:
Dict with keys 'atom_symbols' and 'geom' containing atomic symbols and the
the rotated geometry.
"""
return Dict(rotate_substituent_aiida(FolderData, atom_dict, cc_dict))
@calcfunction
[docs]def dict_to_structure(fragment_dict):
"""Generate a StructureData for Gaussian inputs
Args:
fragment_dict (aiida.orm.Dict): AiiDA orm.Dict with keys 'atom_symbols' and 'geom'
Returns:
aiida.orm.StructureData for the molecule
Note:
input can be generated, for example, by
:func:`aiida_aimall.workchains.calcfunctions.generate_rotated_structure_aiida`
"""
inp_dict = fragment_dict.get_dict()
symbols = inp_dict["atom_symbols"]
symbols = remove_numcharss_from_strlist(symbols)
coords = inp_dict["geom"]
# outstr is xyz file contents
outstr = ""
# numatoms then blank line, then coordinates and symbols for each atom
outstr += f"{len(symbols)}\n\n"
for i, symbol in enumerate(symbols):
if i != len(symbols) - 1:
outstr = (
outstr
+ symbol
+ " "
+ str(coords[i][0])
+ " "
+ str(coords[i][1])
+ " "
+ str(coords[i][2])
+ "\n"
)
else:
outstr = (
outstr
+ symbol
+ " "
+ str(coords[i][0])
+ " "
+ str(coords[i][1])
+ " "
+ str(coords[i][2])
)
# create StructureData from .xyz file string
f = io.StringIO(outstr)
struct_data = StructureData(ase=ase.io.read(f, format="xyz"))
f.close()
return struct_data
[docs]def calc_multiplicity(mol):
"""Calculate the multiplicity of a molecule as 2S +1
Loops over the atoms in the molecule and gets number of radical electrons,
then converts that number to the multiplicity.
Args:
mol: rdkit.Chem molecule object
Returns:
integer number of multiplicity
"""
num_radicals = 0
for atom in mol.GetAtoms():
num_radicals += atom.GetNumRadicalElectrons()
multiplicity = num_radicals + 1
return multiplicity
[docs]def find_attachment_atoms(mol):
"""Given molecule object, find the atoms corresponding to a * and the atom to which that is bound
Args:
mol: rdkit molecule object
Returns:
molecule with added hydrogens, the * atom object, and the atom object to which that is attached
Note:
Assumes that only one * is present in the molecule
"""
# * has atomic number 0
query = rdqueries.AtomNumEqualsQueryAtom(0)
# add hydrogens now
h_mol_rw = Chem.RWMol(mol) # Change type of molecule object
h_mol_rw = Chem.AddHs(h_mol_rw)
query_ats = h_mol_rw.GetAtomsMatchingQuery(query)
if len(query_ats) != 1:
raise ValueError(
f"Molecule should have one placeholder atom with atomic number 0, found {len(query_ats)}"
)
zero_at = query_ats[0]
# this will be bonded to one atom - whichever atom in the bond is not *, is the one we are looking for
bond = zero_at.GetBonds()[0]
begin_atom = bond.GetBeginAtom()
if begin_atom.GetSymbol() != "*":
attached_atom = begin_atom
else:
attached_atom = bond.GetEndAtom()
return h_mol_rw, zero_at, attached_atom
[docs]def reorder_molecule(h_mol_rw, zero_at, attached_atom):
"""Reindexes the atoms in a molecule, setting attached_atom to index 0, and zero_at to index 1
Args:
h_mol_rw: RWMol rdkit object with explicit hydrogens
zero_at: the placeholder * atom
attached_atom: the atom bonded to *
Returns:
molecule with reordered indices
"""
zero_at_idx = zero_at.GetIdx()
zero_at.SetAtomicNum(1)
attached_atom_idx = attached_atom.GetIdx()
# Initialize the new index so that our desired atoms are at the indices we want
first_two_atoms = [attached_atom_idx, zero_at_idx]
# Add the rest of the indices in original order
remaining_idx = [
atom.GetIdx()
for atom in h_mol_rw.GetAtoms()
if atom.GetIdx() not in first_two_atoms
]
out_atom_order = first_two_atoms + remaining_idx
reorder_mol = rdmolops.RenumberAtoms(h_mol_rw, out_atom_order)
return reorder_mol
[docs]def get_xyz(reorder_mol):
"""MMFF optimize the molecule to generate xyz coordiantes
Args:
reorder_mol: rdkit.Chem molecule output, output of :func:`aiida_aimall.workchains.calcfunctions.reorder_molecule`
Returns:
string of the geometry block of an .xyz file
"""
AllChem.EmbedMolecule(reorder_mol)
# not_optimized will be 0 if done, 1 if more steps needed
max_iters = 200
for i in range(0, 6):
not_optimized = AllChem.MMFFOptimizeMolecule(
reorder_mol, maxIters=max_iters
) # Optimize with MMFF94
# -1 is returned for molecules where there are no heavy atom-heavy atom bonds
# for these, hopefully the embed geometry is good enough
# 0 is returned on successful opt
if not_optimized in [0, -1]:
break
if i == 5:
return "Could not determine xyz coordinates"
max_iters = max_iters + 200
xyz_block = AllChem.rdmolfiles.MolToXYZBlock(
reorder_mol
) # pylint:disable=no-member # Store xyz coordinates
split_xyz_block = xyz_block.split("\n")
# first two lines are: number of atoms and blank. Last line is blank
xyz_lines = split_xyz_block[2 : len(split_xyz_block) - 1]
xyz_string = "\n".join([str(item) for item in xyz_lines])
return xyz_string
@calcfunction
@calcfunction
[docs]def generate_structure_data(smiles_dict):
"""Take an input xyz string and convert it to StructureData
Args:
smiles_dict: output of :func:`aiida_aimall.workchains.calcfunctions.get_substituent_input`
Returns:
StructureData of the molecule
"""
structure_Str = smiles_dict["xyz"]
structure_str = structure_Str
# Use the geometry string and create the xyz string for a full .xyz file
num_atoms = len(structure_str.split("\n"))
xyz_string = f"{num_atoms}\n\n" + structure_str
# Convert string to StructureData by encoding it as a file
f = io.StringIO(xyz_string)
struct_data = StructureData(ase=ase.io.read(f, format="xyz"))
f.close()
return struct_data
@calcfunction
[docs]def parameters_with_cm(parameters, smiles_dict):
"""Add charge and multiplicity keys to Gaussian Input
Args:
parameters: dictionary to be provided to GaussianCalculation
smiles_dict: `aiida_aimall.workchains.calcfunctions.get_substituent_input`
Returns:
Dict of Gaussian parameters updated with charge and multiplicity
"""
parameters_dict = parameters.get_dict()
smiles_dict_dict = smiles_dict.get_dict()
parameters_dict["charge"] = smiles_dict_dict["charge"]
parameters_dict["multiplicity"] = smiles_dict_dict["multiplicity"]
return Dict(parameters_dict)
@calcfunction
@calcfunction
[docs]def create_wfx_from_retrieved(wfxname, retrieved_folder):
"""Create wavefunction SinglefileData from retrieved folder
Args:
wfxname: Str of the name of a .wfx file to get from the retrieved folder
retrieved_folder: FolderData of a completed GaussianCalculation
Returns:
SinglefileData of the .wfx file to find in the FolderData
"""
wfx_file_string = retrieved_folder.get_object_content(wfxname.value.strip())
return SinglefileData(io.BytesIO(wfx_file_string.encode()))
[docs]def validate_shell_code(node, _):
"""Validate the shell code, ensuring that it is ShellCode or Str
Args:
node: input node to check the type for ShellCode or Str
Returns:
None if the type is ShellCode or Str, or error string if node is not
"""
if node.node_type not in [
"data.core.code.installed.shell.ShellCode.",
"data.core.str.Str.",
]:
return "the `shell_code` input must be either ShellCode or Str of the command."
return None
[docs]def validate_parser(node, _):
"""Validate the parser, ensuring that the provided value is one of the accepted values.
Args:
node: input node to check the type for ShellCode or Str
Returns:
None if the value is aimall.base or aimall.group, or an error string if it is not
"""
if node.value not in ["aimall.base", "aimall.group"]:
return "the `aim_parser` input must be either aimall.base or aimall.group"
return None
[docs]def validate_file_ext(node, _):
"""Validates that the file extension provided for AIM is wfx, wfn or fchk
Args:
node: node to check the value of to ensure it is in a supported format
Returns:
None if the type is ShellCode or Str, or error string if node is not
"""
if node.value not in ["wfx", "wfn", "fchk"]:
return "the `aim_file_ext` input must be a valid file format for AIMQB: wfx, wfn, or fchk"
return None
@calcfunction
[docs]def get_molecule_str_from_smiles(smiles):
"""For a given smiles, determine xyz structure, charge, and multiplicity
Args:
smiles: SMILEs of substituent to run
Returns:
Dict with keys xyz, charge, multiplicity
"""
mol = Chem.MolFromSmiles(smiles.value)
if not mol:
raise ValueError(
f"Molecule could not be constructed for substituent input SMILES {smiles.value}"
)
h_mol_rw = Chem.RWMol(mol) # Change type of molecule object
h_mol_rw = Chem.AddHs(h_mol_rw)
xyz_string = get_xyz(h_mol_rw)
if xyz_string == "Could not determine xyz coordinates":
raise BadMoleculeException(
"Maximum iterations exceeded, could not determine xyz coordinates for f{smiles.value}"
)
h_mol_rw.UpdatePropertyCache()
charge = Chem.GetFormalCharge(h_mol_rw)
multiplicity = calc_multiplicity(h_mol_rw)
out_dict = Dict({"xyz": xyz_string, "charge": charge, "multiplicity": multiplicity})
return out_dict
@calcfunction
[docs]def xyzfile_to_StructureData(xyz_SFD):
"""Convert the xyz file provided as SinglefileData to StructureData"""
with xyz_SFD.as_path() as filepath:
return StructureData(ase=ase.io.read(filepath, format="xyz"))