Source code for pytoda.proteins.utils

from rdkit import Chem

FORBIDDEN = set(['B', 'O', 'U', 'X', 'Z'])


[docs]def aas_to_smiles(aas, sanitize=True): """Converts an amino acid sequence (IUPAC) into SMILES. Args: aas (str): The amino acid sequence to be converted. Following IUPAC notation. sanitize (bool, optional): [description]. Defaults to True. Raises: TypeError: If aas is not a string. ValueError: If string cannot be converted to mol. Returns: smiles: SMILES string of the AA sequence. """ if not isinstance(aas, str): raise TypeError(f'Provide string not {type(aas)}.') if len(set(aas).intersection(FORBIDDEN)) > 0: raise ValueError( f'Characters from: {FORBIDDEN} cant be parsed. Found one in: {aas}' ) mol = Chem.MolFromFASTA(aas, sanitize=sanitize) if mol is None: raise ValueError(f'Sequence could not be converted to SMILES: {aas}') smiles = Chem.MolToSmiles(mol) return smiles