wearepal · tmke8 · Jan 4, 2023 · Jan 3, 2023 · Jan 3, 2023 · Jan 4, 2023
diff --git a/ethicml/models/algorithm_base.py b/ethicml/models/algorithm_base.py
@@ -21,12 +21,12 @@ class SubprocessAlgorithmMixin(ABC):  # pylint: disable=too-few-public-methods
     """Mixin for running algorithms in a subprocess, to be used with :class:`Algorithm`."""
 
     @property
-    def executable(self) -> str:
+    def executable(self) -> list[str]:
         """Path to a (Python) executable.
 
         By default, the Python executable that called this script is used.
         """
-        return sys.executable
+        return [sys.executable]
 
     def call_script(
         self, cmd_args: list[str], env: dict[str, str] | None = None, cwd: Path | None = None
@@ -41,7 +41,7 @@ def call_script(
         two_hours = 60 * 60 * 2  # 60secs * 60mins * 2 hours
         try:
             process = subprocess.run(  # wait for process creation to finish
-                [self.executable] + cmd_args,
+                self.executable + cmd_args,
                 capture_output=True,
                 env=env,
                 cwd=cwd,

diff --git a/ethicml/models/inprocess/installed_model.py b/ethicml/models/inprocess/installed_model.py
@@ -34,7 +34,7 @@ class InstalledModel(SubprocessAlgorithmMixin, InAlgorithm, ABC):
         simply the last part of the repository URL).
     :param url: URL of the repository. (Default: None)
     :param executable: Path to a Python executable. (Default: None.
-    :param use_poetry: If True, will try to use poetry instead of pipenv. (Default: False)
+    :param use_pdm: If ``True``, will try to use pdm instead of pipenv. (Default: False)
     """
 
     def __init__(
@@ -43,8 +43,8 @@ def __init__(
         dir_name: str,
         top_dir: str,
         url: str | None = None,
-        executable: str | None = None,
-        use_poetry: bool = False,
+        executable: list[str] | None = None,
+        use_pdm: bool = False,
     ):
         # QUESTION: do we really need `store_dir`? we could also just clone the code into "."
         self._store_dir: Path = Path(".") / dir_name  # directory where code and venv are stored
@@ -56,10 +56,7 @@ def __init__(
 
         if executable is None:
             # create virtual environment
-            del use_poetry  # see https://github.com/python-poetry/poetry/issues/4055
-            # self._create_venv(use_poetry=use_poetry)
-            self._create_venv(use_poetry=False)
-            self.__executable = str(self._code_path.resolve() / ".venv" / "bin" / "python")
+            self.__executable = self._create_venv(use_pdm=use_pdm)
         else:
             self.__executable = executable
         self.__name = name
@@ -75,7 +72,7 @@ def _code_path(self) -> Path:
         return self._store_dir / self._top_dir
 
     @property
-    def executable(self) -> str:
+    def executable(self) -> list[str]:
         """Python executable from the virtualenv associated with the model."""
         return self.__executable
 
@@ -88,21 +85,25 @@ def _clone_directory(self, url: str) -> None:
             self._store_dir.mkdir()
             git.cmd.Git(self._store_dir).clone(url)
 
-    def _create_venv(self, use_poetry: bool) -> None:
+    def _create_venv(self, use_pdm: bool) -> list[str]:
         """Create a venv based on the Pipfile in the repository.
 
-        :param use_poetry: Whether to use poetry instead of pipenv.
+        :param use_pdm: Whether to use pdm instead of pipenv.
+        :returns: Shell command to execute python scripts.
+        :raises RuntimeError: If the required package manager isn't found.
         """
-        venv_directory = self._code_path / ".venv"
-        if not venv_directory.exists():
-            if use_poetry and shutil.which("poetry") is not None:  # use poetry instead of pipenv
+        if use_pdm:  # use pdm instead of pipenv
+            pypackages_dir = self._code_path / "__pypackages__"
+            if not pypackages_dir.exists():
+                if shutil.which("pdm") is None:
+                    raise RuntimeError("pdm must be installed")
                 environ = os.environ.copy()
-                environ["POETRY_VIRTUALENVS_CREATE"] = "true"
-                environ["POETRY_VIRTUALENVS_IN_PROJECT"] = "true"
-                subprocess.run(
-                    ["poetry", "install", "--no-root"], env=environ, check=True, cwd=self._code_path
-                )
-                return
+                environ["PDM_USE_VENV"] = "0"
+                subprocess.run(["pdm", "install"], env=environ, check=True, cwd=self._code_path)
+            return ["pdm", "run", "python"]
+
+        venv_directory = self._code_path.resolve() / ".venv"
+        if not venv_directory.exists():
 
             environ = os.environ.copy()
             environ["PIPENV_IGNORE_VIRTUALENVS"] = "1"
@@ -111,6 +112,7 @@ def _create_venv(self, use_poetry: bool) -> None:
             environ["PIPENV_PIPFILE"] = str(self._code_path / "Pipfile")
 
             subprocess.run([sys.executable, "-m", "pipenv", "install"], env=environ, check=True)
+        return [str(venv_directory / "bin" / "python")]
 
     def remove(self) -> None:
         """Remove the directory that we created in :meth:`_clone_directory()`."""

diff --git a/ethicml/models/inprocess/kamishima.py b/ethicml/models/inprocess/kamishima.py
@@ -34,7 +34,7 @@ def __init__(self, *, eta: float = 1.0):
         super().__init__(
             name="Kamishima",
             dir_name="kamishima",
-            url="https://github.com/predictive-analytics-lab/kamfadm.git",
+            url="https://github.com/wearepal/kamfadm.git",
             top_dir="kamfadm",
         )
         self.eta = eta

diff --git a/ethicml/models/inprocess/zafar.py b/ethicml/models/inprocess/zafar.py
@@ -17,8 +17,8 @@
 __all__ = ["ZafarAccuracy", "ZafarBaseline", "ZafarEqOdds", "ZafarEqOpp", "ZafarFairness"]
 
 
-SUB_DIR_IMPACT: Final = Path(".") / "disparate_impact" / "run-classifier"
-SUB_DIR_MISTREAT: Final = Path(".") / "disparate_mistreatment" / "run_classifier"
+SUB_DIR_IMPACT: Final = "src.disparate_impact.run_classifier"
+SUB_DIR_MISTREAT: Final = "src.disparate_mistreatment.run_classifier"
 
 
 class _FitParams(NamedTuple):
@@ -27,15 +27,15 @@ class _FitParams(NamedTuple):
 
 
 class _ZafarAlgorithmBase(InstalledModel):
-    def __init__(self, name: str, sub_dir: Path):
+    def __init__(self, name: str, module: str):
         super().__init__(
             name=name,
             dir_name="zafar",
-            url="https://github.com/predictive-analytics-lab/fair-classification.git",
+            url="https://github.com/wearepal/fair-classification.git",
             top_dir="fair-classification",
-            use_poetry=True,
+            use_pdm=True,
         )
-        self._sub_dir = sub_dir
+        self._module = module
         self._fit_params: _FitParams | None = None
 
     @staticmethod
@@ -88,7 +88,7 @@ def _fit(
         self._create_file_in_zafar_format(train, train_path, label_converter)
 
         cmd = self._get_fit_cmd(str(train_path), str(model_path))
-        working_dir = self._code_path.resolve() / self._sub_dir
+        working_dir = self._code_path.resolve()
         self.call_script(cmd, cwd=working_dir)
 
         return _FitParams(model_path, label_converter)
@@ -100,7 +100,7 @@ def _predict(self, test: TestTuple, tmp_path: Path, fit_params: _FitParams) -> P
         cmd = self._get_predict_cmd(
             str(test_path), str(fit_params.model_path), str(predictions_path)
         )
-        working_dir = self._code_path.resolve() / self._sub_dir
+        working_dir = self._code_path.resolve()
         self.call_script(cmd, cwd=working_dir)
         predictions = predictions_path.open().read()
         predictions = json.loads(predictions)
@@ -113,7 +113,7 @@ def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
         pass
 
     def _get_predict_cmd(self, test_name: str, model_path: str, output_file: str) -> list[str]:
-        return ["predict.py", test_name, model_path, output_file]
+        return ["-m", f"{self._module}.predict", test_name, model_path, output_file]
 
 
 class ZafarBaseline(_ZafarAlgorithmBase):
@@ -122,7 +122,7 @@ class ZafarBaseline(_ZafarAlgorithmBase):
     is_fairness_algo: ClassVar[bool] = False
 
     def __init__(self) -> None:
-        super().__init__(name="ZafarBaseline", sub_dir=SUB_DIR_IMPACT)
+        super().__init__(name="ZafarBaseline", module=SUB_DIR_IMPACT)
 
     @property
     @override
@@ -131,14 +131,14 @@ def hyperparameters(self) -> HyperParamType:
 
     @override
     def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
-        return ["fit.py", train_name, model_path, "baseline", "0"]
+        return ["-m", f"{self._module}.fit", train_name, model_path, "baseline", "0"]
 
 
 class ZafarAccuracy(_ZafarAlgorithmBase):
     """Zafar with fairness."""
 
     def __init__(self, *, gamma: float = 0.5):
-        super().__init__(name=f"ZafarAccuracy, γ={gamma}", sub_dir=SUB_DIR_IMPACT)
+        super().__init__(name=f"ZafarAccuracy, γ={gamma}", module=SUB_DIR_IMPACT)
         self.gamma = gamma
 
     @property
@@ -148,14 +148,14 @@ def hyperparameters(self) -> HyperParamType:
 
     @override
     def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
-        return ["fit.py", train_name, model_path, "gamma", str(self.gamma)]
+        return ["-m", f"{self._module}.fit", train_name, model_path, "gamma", str(self.gamma)]
 
 
 class ZafarFairness(_ZafarAlgorithmBase):
     """Zafar with fairness."""
 
     def __init__(self, *, C: float = 0.001):
-        super().__init__(name=f"ZafarFairness, C={C}", sub_dir=SUB_DIR_IMPACT)
+        super().__init__(name=f"ZafarFairness, C={C}", module=SUB_DIR_IMPACT)
         self._c = C
 
     @property
@@ -165,7 +165,7 @@ def hyperparameters(self) -> HyperParamType:
 
     @override
     def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
-        return ["fit.py", train_name, model_path, "c", str(self._c)]
+        return ["-m", f"{self._module}.fit", train_name, model_path, "c", str(self._c)]
 
 
 class ZafarEqOpp(_ZafarAlgorithmBase):
@@ -176,7 +176,7 @@ class ZafarEqOpp(_ZafarAlgorithmBase):
 
     def __init__(self, *, tau: float = 5.0, mu: float = 1.2, eps: float = 0.0001):
         name = f"{self._base_name}, τ={tau}, μ={mu} ε={eps}"
-        super().__init__(name=name, sub_dir=SUB_DIR_MISTREAT)
+        super().__init__(name=name, module=SUB_DIR_MISTREAT)
         self._tau = tau
         self._mu = mu
         self._eps = eps
@@ -189,7 +189,8 @@ def hyperparameters(self) -> HyperParamType:
     @override
     def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
         return [
-            "fit.py",
+            "-m",
+            f"{self._module}.fit",
             train_name,
             model_path,
             self._mode,

diff --git a/tests/models_test/inprocess_test/zafar_test.py b/tests/models_test/inprocess_test/zafar_test.py
@@ -132,7 +132,7 @@ def test_zafar(toy_train_test: TrainTestPair, zafar_teardown: None) -> None:
     assert zafar_eq_odds.name == "ZafarEqOdds, τ=5.0, μ=1.2 ε=0.0001"
 
     predictions = zafar_eq_odds.run(train, test)
-    assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 40
+    assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 42
 
     predictions = zafar_eq_odds.fit(train).predict(test)
-    assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 40
+    assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 42