graphistry · dcolinmorgan · Nov 27, 2023 · Nov 27, 2023 · Nov 27, 2023 · Nov 27, 2023
diff --git a/graphistry/dgl_utils.py b/graphistry/dgl_utils.py
@@ -17,47 +17,26 @@
 )
 
 from .util import setup_logger
-
+from .utils.dep_manager import deps
 
 if TYPE_CHECKING:
     import scipy
     MIXIN_BASE = FeatureMixin
     try:
-        import torch
+        torch = deps.torch
     except:
         pass
     try:
-        import dgl
+        dgl = deps.dgl
     except:
         pass
 else:
     MIXIN_BASE = object
 
 
-def lazy_dgl_import_has_dependency():
-    try:
-        import warnings
-        warnings.filterwarnings('ignore')
-        import dgl  # noqa: F811
-        return True, 'ok', dgl
-    except ModuleNotFoundError as e:
-        return False, e, None
-
-
-def lazy_torch_import_has_dependency():
-    try:
-        import warnings
-        warnings.filterwarnings('ignore')
-        import torch  # noqa: F811
-        return True, 'ok', torch
-    except ModuleNotFoundError as e:
-        return False, e, None
-
-
 logger = setup_logger(name=__name__)
 
 
-
 # #########################################################################################
 #
 #  Torch helpers
@@ -73,7 +52,7 @@ def convert_to_torch(X_enc: pd.DataFrame, y_enc: Optional[pd.DataFrame]):  # typ
     :param y_enc: DataFrame Matrix of Values for Target
     :return: Dictionary of torch encoded arrays
     """
-    _, _, torch = lazy_torch_import_has_dependency()  # noqa: F811
+    torch = deps.torch  # noqa: F811
 
     if not y_enc.empty:  # type: ignore
         data = {
@@ -98,7 +77,7 @@ def get_available_devices():
         device (torch.device): Main device (GPU 0 or CPU).
         gpu_ids (list): List of IDs of all GPUs that are available.
     """
-    _, _, torch = lazy_torch_import_has_dependency()  # noqa: F811
+    torch = deps.torch  # noqa: F811
 
     gpu_ids = []
     if torch.cuda.is_available():
@@ -181,7 +160,8 @@ def pandas_to_dgl_graph(
         sp_mat: sparse scipy matrix
         ordered_nodes_dict: dict ordered from most common src and dst nodes
     """
-    _, _, dgl = lazy_dgl_import_has_dependency()  # noqa: F811
+    dgl = deps.dgl  # noqa: F811
+
     sp_mat, ordered_nodes_dict = pandas_to_sparse_adjacency(df, src, dst, weight_col)
     g = dgl.from_scipy(sp_mat, device=device)  # there are other ways too
     logger.info(f"Graph Type: {type(g)}") 
@@ -196,7 +176,7 @@ def get_torch_train_test_mask(n: int, ratio: float = 0.8):
     :param ratio: mimics train/test split. `ratio` sets number of True vs False mask entries.
     :return: train and test torch tensor masks
     """
-    _, _, torch = lazy_torch_import_has_dependency()  # noqa: F811
+    torch = deps.torch  # noqa: F811
 
     train_mask = torch.zeros(n, dtype=torch.bool).bernoulli(ratio)
     test_mask = ~train_mask
@@ -225,8 +205,8 @@ def dgl_lazy_init(self, train_split: float = 0.8, device: str = "cpu"):
         """
 
         if not self.dgl_initialized:
-            lazy_dgl_import_has_dependency()
-            lazy_torch_import_has_dependency()
+            deps.dgl
+            deps.torch
             self.train_split = train_split
             self.device = device
             self._removed_edges_previously = False

diff --git a/graphistry/embed_utils.py b/graphistry/embed_utils.py
@@ -2,43 +2,22 @@
 import numpy as np
 import pandas as pd
 from typing import Optional, Union, Callable, List, TYPE_CHECKING, Any, Tuple
-
+from inspect import getmodule
 from .PlotterBase import Plottable
 from .compute.ComputeMixin import ComputeMixin
+from .utils.dep_manager import deps
 
 
-def lazy_embed_import_dep():
-    try:
-        import torch
-        import torch.nn as nn
-        import dgl
-        from dgl.dataloading import GraphDataLoader
-        import torch.nn.functional as F
-        from .networks import HeteroEmbed
-        from tqdm import trange
-        return True, torch, nn, dgl, GraphDataLoader, HeteroEmbed, F, trange
-
-    except:
-        return False, None, None, None, None, None, None, None
-
-def check_cudf():
-    try:
-        import cudf
-        return True, cudf
-    except:
-        return False, object
-
-
 if TYPE_CHECKING:
-    _, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
+    torch = deps.torch
     TT = torch.Tensor
     MIXIN_BASE = ComputeMixin
 else:
     TT = Any
     MIXIN_BASE = object
     torch = Any
 
-has_cudf, cudf = check_cudf()
+cudf = deps.cudf
 
 XSymbolic = Optional[Union[List[str], str, pd.DataFrame]]
 ProtoSymbolic = Optional[Union[str, Callable[[TT, TT, TT], TT]]]  # type: ignore
@@ -99,8 +78,7 @@ def __init__(self):
         self._device = "cpu"
 
     def _preprocess_embedding_data(self, res, train_split:Union[float, int] = 0.8) -> Plottable:
-        #_, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
-        import torch
+        torch = deps.torch
         log('Preprocessing embedding data')
         src, dst = res._source, res._destination
         relation = res._relation
@@ -147,7 +125,7 @@ def _preprocess_embedding_data(self, res, train_split:Union[float, int] = 0.8) -
         return res
 
     def _build_graph(self, res) -> Plottable:
-        _, _, _, dgl, _, _, _, _ = lazy_embed_import_dep()
+        dgl = deps.dgl
         s, r, t = res._triplets.T
 
         if res._train_idx is not None:
@@ -169,7 +147,10 @@ def _build_graph(self, res) -> Plottable:
 
 
     def _init_model(self, res, batch_size:int, sample_size:int, num_steps:int, device):
-        _, _, _, _, GraphDataLoader, HeteroEmbed, _, _ = lazy_embed_import_dep()
+        dgl_ = deps.dgl
+        if dgl_: 
+            from dgl.dataloading import GraphDataLoader
+        from .networks import HeteroEmbed
         g_iter = SubgraphIterator(res._kg_dgl, sample_size, num_steps)
         g_dataloader = GraphDataLoader(
             g_iter, batch_size=batch_size, collate_fn=lambda x: x[0]
@@ -186,9 +167,11 @@ def _init_model(self, res, batch_size:int, sample_size:int, num_steps:int, devic
         )
 
         return model, g_dataloader
-
+        
     def _train_embedding(self, res, epochs:int, batch_size:int, lr:float, sample_size:int, num_steps:int, device) -> Plottable:
-        _, torch, nn, _, _, _, _, trange = lazy_embed_import_dep()
+        torch = deps.torch
+        nn = deps.torch.nn
+        trange = deps.tqdm.trange
         log('Training embedding')
         model, g_dataloader = res._init_model(res, batch_size, sample_size, num_steps, device)
         if hasattr(res, "_embed_model") and not res._build_new_embedding_model:
@@ -232,7 +215,7 @@ def _train_embedding(self, res, epochs:int, batch_size:int, lr:float, sample_siz
 
     @property
     def _gcn_node_embeddings(self):
-        _, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
+        torch = deps.torch
         g_dgl = self._kg_dgl.to(self._device)
         em = self._embed_model(g_dgl).detach()
         torch.cuda.empty_cache()
@@ -301,12 +284,12 @@ def embed(
         """
         # this is temporary, will be fixed in future releases
         try:
-            if isinstance(self._nodes, cudf.DataFrame):
+            if 'cudf' in str(getmodule(self._nodes)):
                 self._nodes = self._nodes.to_pandas()
         except:
             pass
         try:
-            if isinstance(self._edges, cudf.DataFrame):
+            if 'cudf' in str(getmodule(self._edges)):
                 self._edges = self._edges.to_pandas()
         except:
             pass
@@ -436,7 +419,7 @@ def predict_links(
         else:
             # this is temporary, will be removed after gpu feature utils
             try:
-                if isinstance(source, cudf.DataFrame):
+                if 'cudf' in str(getmodule(source)):
                     source = source.to_pandas()  # type: ignore
             except:
                 pass
@@ -448,7 +431,7 @@ def predict_links(
         else:
             # this is temporary, will be removed after gpu feature utils
             try:
-                if isinstance(relation, cudf.DataFrame):
+                if 'cudf' in str(getmodule(relation)):
                     relation = relation.to_pandas()  # type: ignore
             except:
                 pass
@@ -460,7 +443,8 @@ def predict_links(
         else:
             # this is temporary, will be removed after gpu feature utils
             try:
-                if isinstance(destination, cudf.DataFrame):
+                # if isinstance(destination, cudf.DataFrame):
+                if 'cudf' in str(getmodule(destination)):
                     destination = destination.to_pandas()  # type: ignore
             except:
                 pass
@@ -540,7 +524,7 @@ def fetch_triplets_for_inference(x_r):
 
 
     def _score(self, triplets: Union[np.ndarray, TT]) -> TT:  # type: ignore
-        _, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
+        torch = deps.torch
         emb = self._kg_embeddings.clone().detach()
         if not isinstance(triplets, torch.Tensor):
             triplets = torch.tensor(triplets)
@@ -571,7 +555,13 @@ def __len__(self) -> int:
         return self.num_steps
 
     def __getitem__(self, i:int):
-        _, torch, nn, dgl, GraphDataLoader, _, F, _ = lazy_embed_import_dep()
+        torch = deps.torch
+        from torch import nn
+        from torch.nn import functional as F
+        dgl = deps.dgl
+
+        from dgl.dataloading import GraphDataLoader
+
         eids = torch.from_numpy(np.random.choice(self.eids, self.sample_size))
 
         src, dst = self.g.find_edges(eids)
@@ -593,7 +583,7 @@ def __getitem__(self, i:int):
 
     @staticmethod
     def _sample_neg(triplets:np.ndarray, num_nodes:int) -> Tuple[TT, TT]:  # type: ignore
-        _, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
+        torch = deps.torch
         triplets = torch.tensor(triplets)
         h, r, t = triplets.T
         h_o_t = torch.randint(high=2, size=h.size())