"""Testing Crawlers."""
import unittest
from pytoda.preprocessing.crawlers import (
get_smiles_from_pubchem,
query_pubchem,
remove_pubchem_smiles,
)
[docs]class TestCrawlers(unittest.TestCase):
"""Testing Crawlsers."""
[docs] def test_get_smiles_from_pubchem(self) -> None:
"""Test get_smiles_from_pubchem"""
for sanitize in [True, False]:
# Test text mode
ground_truth = 'C1=CC(=CC=C1/C=C/C(=O)C2=C(C=C(C=C2)O)O)O'
for query, drug in zip(['name', 'cid'], ['isoliquiritigenin', 638278]):
smiles = get_smiles_from_pubchem(
drug,
use_isomeric=True,
kekulize=True,
sanitize=sanitize,
query_type=query,
)
self.assertEqual(smiles, ground_truth)
ground_truth = 'C1=CC(=CC=C1C=CC(=O)C2=C(C=C(C=C2)O)O)O'
for query, drug in zip(['name', 'cid'], ['isoliquiritigenin', 638278]):
smiles = get_smiles_from_pubchem(
drug,
use_isomeric=False,
kekulize=True,
sanitize=sanitize,
query_type=query,
)
# mac-os irreproducible stochastic failure on ubuntu
self.assertIn(smiles, [ground_truth, ''])
drug = 'isoliquiritigenin'
if not sanitize:
with self.assertRaises(ValueError):
get_smiles_from_pubchem(
drug, use_isomeric=True, kekulize=False, sanitize=sanitize
)
get_smiles_from_pubchem(
drug, use_isomeric=False, kekulize=False, sanitize=sanitize
)
else:
ground_truth = 'O=C(/C=C/c1ccc(O)cc1)c1ccc(O)cc1O'
smiles = get_smiles_from_pubchem(
drug, use_isomeric=True, kekulize=False, sanitize=sanitize
)
self.assertEqual(smiles, ground_truth)
ground_truth = 'O=C(C=Cc1ccc(O)cc1)c1ccc(O)cc1O'
smiles = get_smiles_from_pubchem(
drug, use_isomeric=False, kekulize=False, sanitize=sanitize
)
self.assertEqual(smiles, ground_truth)
# Test molecule where landing page has several entries
gt_smiles = (
'CC12C(C(CC(O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)NC)OC'
)
drug = 'Staurosporine'
smiles = get_smiles_from_pubchem(drug, use_isomeric=False, kekulize=True)
self.assertEqual(smiles, gt_smiles)
[docs] def test_query_pubchem(self) -> None:
"""Test query_pubchem"""
smiles_list = [
'O1C=CC=NC(=O)C1=O',
'CC(N)S(O)(=O)C(C)CC(C(C)C)c1cc(F)cc(F)c1',
'Clc1ccccc2ccnc12',
]
ground_truths = [(True, 67945516), (False, -2), (False, -1)]
for gt, smiles in zip(ground_truths, smiles_list):
self.assertTupleEqual(query_pubchem(smiles), gt)
[docs] def test_remove_pubchem_smiles(self) -> None:
"""Test remove_pubchem_smiles"""
smiles_list = [
'O1C=CC=NC(=O)C1=O',
'CC(N)S(O)(=O)C(C)CC(C(C)C)c1cc(F)cc(F)c1',
'Clc1ccccc2ccnc12',
]
ground_truth = ['CC(N)S(O)(=O)C(C)CC(C(C)C)c1cc(F)cc(F)c1', 'Clc1ccccc2ccnc12']
self.assertListEqual(remove_pubchem_smiles(smiles_list), ground_truth)
if __name__ == '__main__':
unittest.main()