From e28b55b10b14e8d974db85d1866abe1ba2e3cee2 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:26:27 -0400 Subject: [PATCH 01/14] changes to pre-commmit to include mypy, and adding __version__ import in setup.py. Also including py.typed into the setup data --- .pre-commit-config.yaml | 4 ++++ setup.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3416cbc..d79bc06 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,3 +15,7 @@ repos: rev: "0.6.0" hooks: - id: nbstripout + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.15.0 # (or whichever version you want) + hooks: + - id: mypy diff --git a/setup.py b/setup.py index cd326aa..1f9d802 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import os from glob import glob from setuptools import setup - +from exmol import __version__ exec(open("exmol/version.py").read()) with open("README.md", "r", encoding="utf-8") as fh: @@ -16,7 +16,7 @@ url="https://ur-whitelab.github.io/exmol/", license="MIT", packages=["exmol", "exmol.stoned"], - package_data={"exmol": ["lime_data/*.txt", "lime_data/*.pb"]}, + package_data={"exmol": ["lime_data/*.txt", "lime_data/*.pb", "exmol/py.typed"]}, install_requires=[ "selfies >= 2.0.0", "numpy", From e29244b1e97e02722fc6587e2f1b4a356d8c9c87 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:27:33 -0400 Subject: [PATCH 02/14] fixing mypy issue in docs/source/conf.py--> adding list type to empty arg --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index e612ea6..00e1cd8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -46,7 +46,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +exclude_patterns:list = [] # -- Options for HTML output ------------------------------------------------- From b2dc4af471457fe8f4fd5e9879b713f36bb0daa8 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:27:46 -0400 Subject: [PATCH 03/14] fixing mypy issue in docs/source/conf.py--> adding list type to empty arg --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 00e1cd8..133755d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -46,7 +46,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns:list = [] +exclude_patterns: list = [] # -- Options for HTML output ------------------------------------------------- From 21fd8cac737f6929debfc7b62214ba8dae632876 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:29:50 -0400 Subject: [PATCH 04/14] fixing mypy issues in plot_utils.py --- exmol/plot_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exmol/plot_utils.py b/exmol/plot_utils.py index d005cac..39b7747 100644 --- a/exmol/plot_utils.py +++ b/exmol/plot_utils.py @@ -290,6 +290,7 @@ def similarity_map_using_tstats( if return_svg: return text _imgtext2mpl(text) + return None def plot_space_by_fit( @@ -300,7 +301,7 @@ def plot_space_by_fit( mol_fontsize: int = 8, offset: int = 0, ax: Any = None, - figure_kwargs: Dict = None, + figure_kwargs: Optional[Dict] = None, cartoon: bool = False, rasterized: bool = False, ): From 557affc0d217810ceeef4625a3af9bf6edb77382 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:36:10 -0400 Subject: [PATCH 05/14] fixing mypy issues in exmol file, mainly adding optional[] or type hints where necessary --- exmol/exmol.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/exmol/exmol.py b/exmol/exmol.py index a905323..94ab548 100644 --- a/exmol/exmol.py +++ b/exmol/exmol.py @@ -205,7 +205,7 @@ def _load_smarts(path, rank_cutoff=500): return smarts -def name_morgan_bit(m: Any, bitInfo: Dict[Any, Any], key: int) -> str: +def name_morgan_bit(m: Any, bitInfo: Dict[Any, Any], key: int) -> Optional[str]: """Get the name of a Morgan bit using a SMARTS dictionary :param m: RDKit molecule @@ -273,9 +273,9 @@ def get_functional_groups( if isinstance(mol, str): mol = smi2mol(mol) if mol is None: - return [] + return set([]) - matched_atoms = set() + matched_atoms: set = set() result = set() sorted_smarts = sorted(_SMARTS.items(), key=lambda x: x[1][1]) @@ -310,7 +310,7 @@ def clear_descriptors( def add_descriptors( examples: List[Example], descriptor_type: str = "MACCS", - mols: List[Any] = None, + mols: Optional[List[Any]] = None, ) -> List[Example]: """Add descriptors to passed examples @@ -415,7 +415,7 @@ def get_basic_alphabet() -> Set[str]: # is always a plain uncharged element. -def _alphabet_to_elements(alphabet: List[str]) -> Set[str]: +def _alphabet_to_elements(alphabet: Union[List[str], Set[str]]) -> Set[str]: """Converts SELFIES alphabet to element symbols""" symbols = [] for s in alphabet: @@ -426,8 +426,8 @@ def _alphabet_to_elements(alphabet: List[str]) -> Set[str]: def _check_alphabet_consistency( - smiles: str, alphabet_symbols: Set[str], check=False -) -> True: + smiles: str, alphabet_symbols: Union[Set[str], List[str]], check=False +) -> bool: """Checks if SMILES only contains tokens from alphabet""" alphabet_symbols = _alphabet_to_elements(set(alphabet_symbols)) @@ -448,7 +448,7 @@ def run_stoned( num_samples: int = 2000, max_mutations: int = 2, min_mutations: int = 1, - alphabet: Union[List[str], Set[str]] = None, + alphabet: Optional[Union[List[str], Set[str]]] = None, return_selfies: bool = False, _pbar: Any = None, ) -> Union[Tuple[List[str], List[float]], Tuple[List[str], List[str], List[float]]]: @@ -491,11 +491,11 @@ def run_stoned( # Mutate the SELFIES: if _pbar: _pbar.set_description(f"🥌STONED🥌 Mutations: {num_mutations}") - selfies_mut = stoned.get_mutated_SELFIES( + selfies_mut: list | tuple = stoned.get_mutated_SELFIES( selfies_ls.copy(), num_mutations=num_mutations, alphabet=alphabet ) # Convert back to SMILES: - smiles_back = [sf.decoder(x) for x in selfies_mut] + smiles_back: list | tuple = [sf.decoder(x) for x in selfies_mut] # check if smiles are consistent with alphabet and downslect selfies_mut, smiles_back = zip( *[ @@ -664,10 +664,10 @@ def sample_space( ], batched: bool = True, preset: str = "medium", - data: List[Union[str, rdchem.Mol]] = None, - method_kwargs: Dict = None, - num_samples: int = None, - stoned_kwargs: Dict = None, + data: Optional[List[Union[str, rdchem.Mol]]] = None, + method_kwargs: Optional[Dict] = None, + num_samples: Optional[int] = None, + stoned_kwargs: Optional[Dict] = None, quiet: bool = False, use_selfies: bool = False, sanitize_smiles: bool = True, @@ -1017,7 +1017,7 @@ def is_low(e): def plot_space( examples: List[Example], exps: List[Example], - figure_kwargs: Dict = None, + figure_kwargs: Optional[Dict] = None, mol_size: Tuple[int, int] = (200, 200), highlight_clusters: bool = False, mol_fontsize: int = 8, @@ -1111,11 +1111,11 @@ def normalizer(x): def plot_cf( exps: List[Example], fig: Any = None, - figure_kwargs: Dict = None, + figure_kwargs: Optional[Dict] = None, mol_size: Tuple[int, int] = (200, 200), mol_fontsize: int = 10, - nrows: int = None, - ncols: int = None, + nrows: Optional[int] = None, + ncols: Optional[int] = None, ): """Draw the given set of Examples in a grid @@ -1159,10 +1159,10 @@ def plot_cf( def plot_descriptors( examples: List[Example], - output_file: str = None, + output_file: Optional[str] = None, fig: Any = None, - figure_kwargs: Dict = None, - title: str = None, + figure_kwargs: Optional[Dict] = None, + title: Optional[str] = None, return_svg: bool = False, ): """Plot descriptor attributions from given set of Examples. From 0b4968105974e433839dbfdf32dd7f9081904420 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:37:28 -0400 Subject: [PATCH 06/14] adding py.typed --- exmol/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 exmol/py.typed diff --git a/exmol/py.typed b/exmol/py.typed new file mode 100644 index 0000000..e69de29 From 9643cea2069131b4bf1f04fafd3a3a4f51ae8ae5 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:47:11 -0400 Subject: [PATCH 07/14] fixing import issue arised from importing __version__, now is importing it directly from the file --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1f9d802..61869be 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ import os from glob import glob from setuptools import setup -from exmol import __version__ +from exmol.version import __version__ + exec(open("exmol/version.py").read()) with open("README.md", "r", encoding="utf-8") as fh: From 3ab670aa131dfe0bf5e88f6b737e6a584d5d6eaf Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:54:10 -0400 Subject: [PATCH 08/14] removing unnecesary import --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 61869be..3f821d6 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ import os from glob import glob from setuptools import setup -from exmol.version import __version__ exec(open("exmol/version.py").read()) From c65605b4edf4bffb2ceac6dffc2d0b511413a841 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 10:54:22 -0400 Subject: [PATCH 09/14] removing unnecesary import --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3f821d6..9ff3081 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name="exmol", - version=__version__, + version=__version__, # type: ignore description="Counterfactual generation with STONED SELFIES", author="Aditi Seshadri, Geemi Wellawatte, Andrew White", author_email="andrew.white@rochester.edu", From 110214353618dc3f9d0c08b33e03422bb9b3c6d1 Mon Sep 17 00:00:00 2001 From: Jorge <97254349+Jgmedina95@users.noreply.github.com> Date: Tue, 29 Apr 2025 11:47:15 -0400 Subject: [PATCH 10/14] Update RF.ipynb kernel specs --- paper1_CFs/RF.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper1_CFs/RF.ipynb b/paper1_CFs/RF.ipynb index 1455cd2..d70a172 100644 --- a/paper1_CFs/RF.ipynb +++ b/paper1_CFs/RF.ipynb @@ -385,9 +385,9 @@ "metadata": { "celltoolbar": "Tags", "kernelspec": { - "display_name": "exmol312", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "exmol312" + "name": "python3" }, "language_info": { "codemirror_mode": { From a22c55877f025636b81eae172c879158ea146914 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 20:19:04 -0400 Subject: [PATCH 11/14] fixing jax deprecation caught in paper3, GNN --- paper3_Scents/GNNModelTrainingAndEvaluation.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper3_Scents/GNNModelTrainingAndEvaluation.ipynb b/paper3_Scents/GNNModelTrainingAndEvaluation.ipynb index 5079371..c7eb9f0 100644 --- a/paper3_Scents/GNNModelTrainingAndEvaluation.ipynb +++ b/paper3_Scents/GNNModelTrainingAndEvaluation.ipynb @@ -511,7 +511,7 @@ "# Code to compute L2 regularization based on that in the \"MLP on MNIST\" Example on the Haiku Github repository (https://github.com/deepmind/dm-haiku/blob/main/examples/mnist.py)\n", "def loss_fn_logits_reg(params, x, y):\n", " l2_lossTerm = regularizationStrength * sum(\n", - " jnp.sum(jnp.square(p)) for p in jax.tree_leaves(params)\n", + " jnp.sum(jnp.square(p)) for p in jax.tree_util.tree_leaves(params)\n", " )\n", " logits = loss_fn_logits(params, x, y)\n", " return logits + l2_lossTerm" From 004f5c6cde2ad1462a85e2ade13d1e95e2e60b21 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 29 Apr 2025 20:21:53 -0400 Subject: [PATCH 12/14] improving docstring on name_morgan_bit function --- exmol/exmol.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exmol/exmol.py b/exmol/exmol.py index 94ab548..13c8b0d 100644 --- a/exmol/exmol.py +++ b/exmol/exmol.py @@ -211,6 +211,8 @@ def name_morgan_bit(m: Any, bitInfo: Dict[Any, Any], key: int) -> Optional[str]: :param m: RDKit molecule :param bitInfo: bitInfo dictionary from rdkit.Chem.AllChem.GetMorganFingerprint :param key: bit key corresponding to the fingerprint you want to have named + + :return: Name of the bit, or None if no match is found """ global _SMARTS if _SMARTS is None: From 8fe78f300ec7fafd787fd8a043c38c0ef5b1ba96 Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 5 May 2025 09:44:52 -0400 Subject: [PATCH 13/14] add case when mol is None in docstring of get_functional_groups for clarity --- exmol/exmol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exmol/exmol.py b/exmol/exmol.py index 13c8b0d..ba613cb 100644 --- a/exmol/exmol.py +++ b/exmol/exmol.py @@ -262,7 +262,7 @@ def get_functional_groups( :param mol: RDKit molecule :param return_all: If True, will return all functional groups found in the molecule :param cutoff: Maximum rank of functional groups to consider based on popularity (increase to include groups like methyl, ethyl, etc.) - :return: set of unique functional group names present in the molecule + :return: set of unique functional group names present in the molecule. If mol is None, returns an empty set. """ global _SMARTS if _SMARTS is None: From 7a9ee24a8ab71952d201061c139d4621f14b81fe Mon Sep 17 00:00:00 2001 From: Jorge Date: Mon, 5 May 2025 13:13:49 -0400 Subject: [PATCH 14/14] updating to version 3.2.3 --- exmol/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exmol/version.py b/exmol/version.py index 1b99243..66c9e68 100644 --- a/exmol/version.py +++ b/exmol/version.py @@ -1 +1 @@ -__version__ = "3.2.2" +__version__ = "3.2.3"