Source code for pytoda.preprocessing.tests.test_crawlers

"""Testing Crawlers."""

import unittest

from pytoda.preprocessing.crawlers import (
    get_smiles_from_pubchem,
    query_pubchem,
    remove_pubchem_smiles,
)


[docs]class TestCrawlers(unittest.TestCase): """Testing Crawlsers."""
[docs] def test_get_smiles_from_pubchem(self) -> None: """Test get_smiles_from_pubchem""" for sanitize in [True, False]: # Test text mode ground_truth = 'C1=CC(=CC=C1/C=C/C(=O)C2=C(C=C(C=C2)O)O)O' for query, drug in zip(['name', 'cid'], ['isoliquiritigenin', 638278]): smiles = get_smiles_from_pubchem( drug, use_isomeric=True, kekulize=True, sanitize=sanitize, query_type=query, ) self.assertEqual(smiles, ground_truth) ground_truth = 'C1=CC(=CC=C1C=CC(=O)C2=C(C=C(C=C2)O)O)O' for query, drug in zip(['name', 'cid'], ['isoliquiritigenin', 638278]): smiles = get_smiles_from_pubchem( drug, use_isomeric=False, kekulize=True, sanitize=sanitize, query_type=query, ) # mac-os irreproducible stochastic failure on ubuntu self.assertIn(smiles, [ground_truth, '']) drug = 'isoliquiritigenin' if not sanitize: with self.assertRaises(ValueError): get_smiles_from_pubchem( drug, use_isomeric=True, kekulize=False, sanitize=sanitize ) get_smiles_from_pubchem( drug, use_isomeric=False, kekulize=False, sanitize=sanitize ) else: ground_truth = 'O=C(/C=C/c1ccc(O)cc1)c1ccc(O)cc1O' smiles = get_smiles_from_pubchem( drug, use_isomeric=True, kekulize=False, sanitize=sanitize ) self.assertEqual(smiles, ground_truth) ground_truth = 'O=C(C=Cc1ccc(O)cc1)c1ccc(O)cc1O' smiles = get_smiles_from_pubchem( drug, use_isomeric=False, kekulize=False, sanitize=sanitize ) self.assertEqual(smiles, ground_truth) # Test molecule where landing page has several entries gt_smiles = ( 'CC12C(C(CC(O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)NC)OC' ) drug = 'Staurosporine' smiles = get_smiles_from_pubchem(drug, use_isomeric=False, kekulize=True) self.assertEqual(smiles, gt_smiles)
[docs] def test_query_pubchem(self) -> None: """Test query_pubchem""" smiles_list = [ 'O1C=CC=NC(=O)C1=O', 'CC(N)S(O)(=O)C(C)CC(C(C)C)c1cc(F)cc(F)c1', 'Clc1ccccc2ccnc12', ] ground_truths = [(True, 67945516), (False, -2), (False, -1)] for gt, smiles in zip(ground_truths, smiles_list): self.assertTupleEqual(query_pubchem(smiles), gt)
[docs] def test_remove_pubchem_smiles(self) -> None: """Test remove_pubchem_smiles""" smiles_list = [ 'O1C=CC=NC(=O)C1=O', 'CC(N)S(O)(=O)C(C)CC(C(C)C)c1cc(F)cc(F)c1', 'Clc1ccccc2ccnc12', ] ground_truth = ['CC(N)S(O)(=O)C(C)CC(C(C)C)c1cc(F)cc(F)c1', 'Clc1ccccc2ccnc12'] self.assertListEqual(remove_pubchem_smiles(smiles_list), ground_truth)
if __name__ == '__main__': unittest.main()