diff --git a/ethicml/models/algorithm_base.py b/ethicml/models/algorithm_base.py index 96ebee87..f9dceb2d 100644 --- a/ethicml/models/algorithm_base.py +++ b/ethicml/models/algorithm_base.py @@ -21,12 +21,12 @@ class SubprocessAlgorithmMixin(ABC): # pylint: disable=too-few-public-methods """Mixin for running algorithms in a subprocess, to be used with :class:`Algorithm`.""" @property - def executable(self) -> str: + def executable(self) -> list[str]: """Path to a (Python) executable. By default, the Python executable that called this script is used. """ - return sys.executable + return [sys.executable] def call_script( self, cmd_args: list[str], env: dict[str, str] | None = None, cwd: Path | None = None @@ -41,7 +41,7 @@ def call_script( two_hours = 60 * 60 * 2 # 60secs * 60mins * 2 hours try: process = subprocess.run( # wait for process creation to finish - [self.executable] + cmd_args, + self.executable + cmd_args, capture_output=True, env=env, cwd=cwd, diff --git a/ethicml/models/inprocess/installed_model.py b/ethicml/models/inprocess/installed_model.py index 73ff170d..74baad11 100644 --- a/ethicml/models/inprocess/installed_model.py +++ b/ethicml/models/inprocess/installed_model.py @@ -34,7 +34,7 @@ class InstalledModel(SubprocessAlgorithmMixin, InAlgorithm, ABC): simply the last part of the repository URL). :param url: URL of the repository. (Default: None) :param executable: Path to a Python executable. (Default: None. - :param use_poetry: If True, will try to use poetry instead of pipenv. (Default: False) + :param use_pdm: If ``True``, will try to use pdm instead of pipenv. (Default: False) """ def __init__( @@ -43,8 +43,8 @@ def __init__( dir_name: str, top_dir: str, url: str | None = None, - executable: str | None = None, - use_poetry: bool = False, + executable: list[str] | None = None, + use_pdm: bool = False, ): # QUESTION: do we really need `store_dir`? we could also just clone the code into "." self._store_dir: Path = Path(".") / dir_name # directory where code and venv are stored @@ -56,10 +56,7 @@ def __init__( if executable is None: # create virtual environment - del use_poetry # see https://github.com/python-poetry/poetry/issues/4055 - # self._create_venv(use_poetry=use_poetry) - self._create_venv(use_poetry=False) - self.__executable = str(self._code_path.resolve() / ".venv" / "bin" / "python") + self.__executable = self._create_venv(use_pdm=use_pdm) else: self.__executable = executable self.__name = name @@ -75,7 +72,7 @@ def _code_path(self) -> Path: return self._store_dir / self._top_dir @property - def executable(self) -> str: + def executable(self) -> list[str]: """Python executable from the virtualenv associated with the model.""" return self.__executable @@ -88,21 +85,25 @@ def _clone_directory(self, url: str) -> None: self._store_dir.mkdir() git.cmd.Git(self._store_dir).clone(url) - def _create_venv(self, use_poetry: bool) -> None: + def _create_venv(self, use_pdm: bool) -> list[str]: """Create a venv based on the Pipfile in the repository. - :param use_poetry: Whether to use poetry instead of pipenv. + :param use_pdm: Whether to use pdm instead of pipenv. + :returns: Shell command to execute python scripts. + :raises RuntimeError: If the required package manager isn't found. """ - venv_directory = self._code_path / ".venv" - if not venv_directory.exists(): - if use_poetry and shutil.which("poetry") is not None: # use poetry instead of pipenv + if use_pdm: # use pdm instead of pipenv + pypackages_dir = self._code_path / "__pypackages__" + if not pypackages_dir.exists(): + if shutil.which("pdm") is None: + raise RuntimeError("pdm must be installed") environ = os.environ.copy() - environ["POETRY_VIRTUALENVS_CREATE"] = "true" - environ["POETRY_VIRTUALENVS_IN_PROJECT"] = "true" - subprocess.run( - ["poetry", "install", "--no-root"], env=environ, check=True, cwd=self._code_path - ) - return + environ["PDM_USE_VENV"] = "0" + subprocess.run(["pdm", "install"], env=environ, check=True, cwd=self._code_path) + return ["pdm", "run", "python"] + + venv_directory = self._code_path.resolve() / ".venv" + if not venv_directory.exists(): environ = os.environ.copy() environ["PIPENV_IGNORE_VIRTUALENVS"] = "1" @@ -111,6 +112,7 @@ def _create_venv(self, use_poetry: bool) -> None: environ["PIPENV_PIPFILE"] = str(self._code_path / "Pipfile") subprocess.run([sys.executable, "-m", "pipenv", "install"], env=environ, check=True) + return [str(venv_directory / "bin" / "python")] def remove(self) -> None: """Remove the directory that we created in :meth:`_clone_directory()`.""" diff --git a/ethicml/models/inprocess/kamishima.py b/ethicml/models/inprocess/kamishima.py index 1ddd2d2c..97dc53bb 100644 --- a/ethicml/models/inprocess/kamishima.py +++ b/ethicml/models/inprocess/kamishima.py @@ -34,7 +34,7 @@ def __init__(self, *, eta: float = 1.0): super().__init__( name="Kamishima", dir_name="kamishima", - url="https://github.com/predictive-analytics-lab/kamfadm.git", + url="https://github.com/wearepal/kamfadm.git", top_dir="kamfadm", ) self.eta = eta diff --git a/ethicml/models/inprocess/zafar.py b/ethicml/models/inprocess/zafar.py index 9e297dee..0823aaaf 100644 --- a/ethicml/models/inprocess/zafar.py +++ b/ethicml/models/inprocess/zafar.py @@ -17,8 +17,8 @@ __all__ = ["ZafarAccuracy", "ZafarBaseline", "ZafarEqOdds", "ZafarEqOpp", "ZafarFairness"] -SUB_DIR_IMPACT: Final = Path(".") / "disparate_impact" / "run-classifier" -SUB_DIR_MISTREAT: Final = Path(".") / "disparate_mistreatment" / "run_classifier" +SUB_DIR_IMPACT: Final = "src.disparate_impact.run_classifier" +SUB_DIR_MISTREAT: Final = "src.disparate_mistreatment.run_classifier" class _FitParams(NamedTuple): @@ -27,15 +27,15 @@ class _FitParams(NamedTuple): class _ZafarAlgorithmBase(InstalledModel): - def __init__(self, name: str, sub_dir: Path): + def __init__(self, name: str, module: str): super().__init__( name=name, dir_name="zafar", - url="https://github.com/predictive-analytics-lab/fair-classification.git", + url="https://github.com/wearepal/fair-classification.git", top_dir="fair-classification", - use_poetry=True, + use_pdm=True, ) - self._sub_dir = sub_dir + self._module = module self._fit_params: _FitParams | None = None @staticmethod @@ -88,7 +88,7 @@ def _fit( self._create_file_in_zafar_format(train, train_path, label_converter) cmd = self._get_fit_cmd(str(train_path), str(model_path)) - working_dir = self._code_path.resolve() / self._sub_dir + working_dir = self._code_path.resolve() self.call_script(cmd, cwd=working_dir) return _FitParams(model_path, label_converter) @@ -100,7 +100,7 @@ def _predict(self, test: TestTuple, tmp_path: Path, fit_params: _FitParams) -> P cmd = self._get_predict_cmd( str(test_path), str(fit_params.model_path), str(predictions_path) ) - working_dir = self._code_path.resolve() / self._sub_dir + working_dir = self._code_path.resolve() self.call_script(cmd, cwd=working_dir) predictions = predictions_path.open().read() predictions = json.loads(predictions) @@ -113,7 +113,7 @@ def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]: pass def _get_predict_cmd(self, test_name: str, model_path: str, output_file: str) -> list[str]: - return ["predict.py", test_name, model_path, output_file] + return ["-m", f"{self._module}.predict", test_name, model_path, output_file] class ZafarBaseline(_ZafarAlgorithmBase): @@ -122,7 +122,7 @@ class ZafarBaseline(_ZafarAlgorithmBase): is_fairness_algo: ClassVar[bool] = False def __init__(self) -> None: - super().__init__(name="ZafarBaseline", sub_dir=SUB_DIR_IMPACT) + super().__init__(name="ZafarBaseline", module=SUB_DIR_IMPACT) @property @override @@ -131,14 +131,14 @@ def hyperparameters(self) -> HyperParamType: @override def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]: - return ["fit.py", train_name, model_path, "baseline", "0"] + return ["-m", f"{self._module}.fit", train_name, model_path, "baseline", "0"] class ZafarAccuracy(_ZafarAlgorithmBase): """Zafar with fairness.""" def __init__(self, *, gamma: float = 0.5): - super().__init__(name=f"ZafarAccuracy, γ={gamma}", sub_dir=SUB_DIR_IMPACT) + super().__init__(name=f"ZafarAccuracy, γ={gamma}", module=SUB_DIR_IMPACT) self.gamma = gamma @property @@ -148,14 +148,14 @@ def hyperparameters(self) -> HyperParamType: @override def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]: - return ["fit.py", train_name, model_path, "gamma", str(self.gamma)] + return ["-m", f"{self._module}.fit", train_name, model_path, "gamma", str(self.gamma)] class ZafarFairness(_ZafarAlgorithmBase): """Zafar with fairness.""" def __init__(self, *, C: float = 0.001): - super().__init__(name=f"ZafarFairness, C={C}", sub_dir=SUB_DIR_IMPACT) + super().__init__(name=f"ZafarFairness, C={C}", module=SUB_DIR_IMPACT) self._c = C @property @@ -165,7 +165,7 @@ def hyperparameters(self) -> HyperParamType: @override def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]: - return ["fit.py", train_name, model_path, "c", str(self._c)] + return ["-m", f"{self._module}.fit", train_name, model_path, "c", str(self._c)] class ZafarEqOpp(_ZafarAlgorithmBase): @@ -176,7 +176,7 @@ class ZafarEqOpp(_ZafarAlgorithmBase): def __init__(self, *, tau: float = 5.0, mu: float = 1.2, eps: float = 0.0001): name = f"{self._base_name}, τ={tau}, μ={mu} ε={eps}" - super().__init__(name=name, sub_dir=SUB_DIR_MISTREAT) + super().__init__(name=name, module=SUB_DIR_MISTREAT) self._tau = tau self._mu = mu self._eps = eps @@ -189,7 +189,8 @@ def hyperparameters(self) -> HyperParamType: @override def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]: return [ - "fit.py", + "-m", + f"{self._module}.fit", train_name, model_path, self._mode, diff --git a/tests/models_test/inprocess_test/zafar_test.py b/tests/models_test/inprocess_test/zafar_test.py index f47d7a78..890cfbc3 100644 --- a/tests/models_test/inprocess_test/zafar_test.py +++ b/tests/models_test/inprocess_test/zafar_test.py @@ -132,7 +132,7 @@ def test_zafar(toy_train_test: TrainTestPair, zafar_teardown: None) -> None: assert zafar_eq_odds.name == "ZafarEqOdds, τ=5.0, μ=1.2 ε=0.0001" predictions = zafar_eq_odds.run(train, test) - assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 40 + assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 42 predictions = zafar_eq_odds.fit(train).predict(test) - assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 40 + assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 42