Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Zafar to Python 3 #829

Merged
merged 4 commits into from
Jan 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ethicml/models/algorithm_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class SubprocessAlgorithmMixin(ABC): # pylint: disable=too-few-public-methods
"""Mixin for running algorithms in a subprocess, to be used with :class:`Algorithm`."""

@property
def executable(self) -> str:
def executable(self) -> list[str]:
"""Path to a (Python) executable.

By default, the Python executable that called this script is used.
"""
return sys.executable
return [sys.executable]

def call_script(
self, cmd_args: list[str], env: dict[str, str] | None = None, cwd: Path | None = None
Expand All @@ -41,7 +41,7 @@ def call_script(
two_hours = 60 * 60 * 2 # 60secs * 60mins * 2 hours
try:
process = subprocess.run( # wait for process creation to finish
[self.executable] + cmd_args,
self.executable + cmd_args,
capture_output=True,
env=env,
cwd=cwd,
Expand Down
40 changes: 21 additions & 19 deletions ethicml/models/inprocess/installed_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class InstalledModel(SubprocessAlgorithmMixin, InAlgorithm, ABC):
simply the last part of the repository URL).
:param url: URL of the repository. (Default: None)
:param executable: Path to a Python executable. (Default: None.
:param use_poetry: If True, will try to use poetry instead of pipenv. (Default: False)
:param use_pdm: If ``True``, will try to use pdm instead of pipenv. (Default: False)
"""

def __init__(
Expand All @@ -43,8 +43,8 @@ def __init__(
dir_name: str,
top_dir: str,
url: str | None = None,
executable: str | None = None,
use_poetry: bool = False,
executable: list[str] | None = None,
use_pdm: bool = False,
):
# QUESTION: do we really need `store_dir`? we could also just clone the code into "."
self._store_dir: Path = Path(".") / dir_name # directory where code and venv are stored
Expand All @@ -56,10 +56,7 @@ def __init__(

if executable is None:
# create virtual environment
del use_poetry # see https://github.com/python-poetry/poetry/issues/4055
# self._create_venv(use_poetry=use_poetry)
self._create_venv(use_poetry=False)
self.__executable = str(self._code_path.resolve() / ".venv" / "bin" / "python")
self.__executable = self._create_venv(use_pdm=use_pdm)
else:
self.__executable = executable
self.__name = name
Expand All @@ -75,7 +72,7 @@ def _code_path(self) -> Path:
return self._store_dir / self._top_dir

@property
def executable(self) -> str:
def executable(self) -> list[str]:
"""Python executable from the virtualenv associated with the model."""
return self.__executable

Expand All @@ -88,21 +85,25 @@ def _clone_directory(self, url: str) -> None:
self._store_dir.mkdir()
git.cmd.Git(self._store_dir).clone(url)

def _create_venv(self, use_poetry: bool) -> None:
def _create_venv(self, use_pdm: bool) -> list[str]:
"""Create a venv based on the Pipfile in the repository.

:param use_poetry: Whether to use poetry instead of pipenv.
:param use_pdm: Whether to use pdm instead of pipenv.
:returns: Shell command to execute python scripts.
:raises RuntimeError: If the required package manager isn't found.
"""
venv_directory = self._code_path / ".venv"
if not venv_directory.exists():
if use_poetry and shutil.which("poetry") is not None: # use poetry instead of pipenv
if use_pdm: # use pdm instead of pipenv
pypackages_dir = self._code_path / "__pypackages__"
if not pypackages_dir.exists():
if shutil.which("pdm") is None:
raise RuntimeError("pdm must be installed")
environ = os.environ.copy()
environ["POETRY_VIRTUALENVS_CREATE"] = "true"
environ["POETRY_VIRTUALENVS_IN_PROJECT"] = "true"
subprocess.run(
["poetry", "install", "--no-root"], env=environ, check=True, cwd=self._code_path
)
return
environ["PDM_USE_VENV"] = "0"
subprocess.run(["pdm", "install"], env=environ, check=True, cwd=self._code_path)
return ["pdm", "run", "python"]

venv_directory = self._code_path.resolve() / ".venv"
if not venv_directory.exists():

environ = os.environ.copy()
environ["PIPENV_IGNORE_VIRTUALENVS"] = "1"
Expand All @@ -111,6 +112,7 @@ def _create_venv(self, use_poetry: bool) -> None:
environ["PIPENV_PIPFILE"] = str(self._code_path / "Pipfile")

subprocess.run([sys.executable, "-m", "pipenv", "install"], env=environ, check=True)
return [str(venv_directory / "bin" / "python")]

def remove(self) -> None:
"""Remove the directory that we created in :meth:`_clone_directory()`."""
Expand Down
2 changes: 1 addition & 1 deletion ethicml/models/inprocess/kamishima.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self, *, eta: float = 1.0):
super().__init__(
name="Kamishima",
dir_name="kamishima",
url="https://github.com/predictive-analytics-lab/kamfadm.git",
url="https://github.com/wearepal/kamfadm.git",
top_dir="kamfadm",
)
self.eta = eta
Expand Down
35 changes: 18 additions & 17 deletions ethicml/models/inprocess/zafar.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
__all__ = ["ZafarAccuracy", "ZafarBaseline", "ZafarEqOdds", "ZafarEqOpp", "ZafarFairness"]


SUB_DIR_IMPACT: Final = Path(".") / "disparate_impact" / "run-classifier"
SUB_DIR_MISTREAT: Final = Path(".") / "disparate_mistreatment" / "run_classifier"
SUB_DIR_IMPACT: Final = "src.disparate_impact.run_classifier"
SUB_DIR_MISTREAT: Final = "src.disparate_mistreatment.run_classifier"


class _FitParams(NamedTuple):
Expand All @@ -27,15 +27,15 @@ class _FitParams(NamedTuple):


class _ZafarAlgorithmBase(InstalledModel):
def __init__(self, name: str, sub_dir: Path):
def __init__(self, name: str, module: str):
super().__init__(
name=name,
dir_name="zafar",
url="https://github.com/predictive-analytics-lab/fair-classification.git",
url="https://github.com/wearepal/fair-classification.git",
top_dir="fair-classification",
use_poetry=True,
use_pdm=True,
)
self._sub_dir = sub_dir
self._module = module
self._fit_params: _FitParams | None = None

@staticmethod
Expand Down Expand Up @@ -88,7 +88,7 @@ def _fit(
self._create_file_in_zafar_format(train, train_path, label_converter)

cmd = self._get_fit_cmd(str(train_path), str(model_path))
working_dir = self._code_path.resolve() / self._sub_dir
working_dir = self._code_path.resolve()
self.call_script(cmd, cwd=working_dir)

return _FitParams(model_path, label_converter)
Expand All @@ -100,7 +100,7 @@ def _predict(self, test: TestTuple, tmp_path: Path, fit_params: _FitParams) -> P
cmd = self._get_predict_cmd(
str(test_path), str(fit_params.model_path), str(predictions_path)
)
working_dir = self._code_path.resolve() / self._sub_dir
working_dir = self._code_path.resolve()
self.call_script(cmd, cwd=working_dir)
predictions = predictions_path.open().read()
predictions = json.loads(predictions)
Expand All @@ -113,7 +113,7 @@ def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
pass

def _get_predict_cmd(self, test_name: str, model_path: str, output_file: str) -> list[str]:
return ["predict.py", test_name, model_path, output_file]
return ["-m", f"{self._module}.predict", test_name, model_path, output_file]


class ZafarBaseline(_ZafarAlgorithmBase):
Expand All @@ -122,7 +122,7 @@ class ZafarBaseline(_ZafarAlgorithmBase):
is_fairness_algo: ClassVar[bool] = False

def __init__(self) -> None:
super().__init__(name="ZafarBaseline", sub_dir=SUB_DIR_IMPACT)
super().__init__(name="ZafarBaseline", module=SUB_DIR_IMPACT)

@property
@override
Expand All @@ -131,14 +131,14 @@ def hyperparameters(self) -> HyperParamType:

@override
def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
return ["fit.py", train_name, model_path, "baseline", "0"]
return ["-m", f"{self._module}.fit", train_name, model_path, "baseline", "0"]


class ZafarAccuracy(_ZafarAlgorithmBase):
"""Zafar with fairness."""

def __init__(self, *, gamma: float = 0.5):
super().__init__(name=f"ZafarAccuracy, γ={gamma}", sub_dir=SUB_DIR_IMPACT)
super().__init__(name=f"ZafarAccuracy, γ={gamma}", module=SUB_DIR_IMPACT)
self.gamma = gamma

@property
Expand All @@ -148,14 +148,14 @@ def hyperparameters(self) -> HyperParamType:

@override
def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
return ["fit.py", train_name, model_path, "gamma", str(self.gamma)]
return ["-m", f"{self._module}.fit", train_name, model_path, "gamma", str(self.gamma)]


class ZafarFairness(_ZafarAlgorithmBase):
"""Zafar with fairness."""

def __init__(self, *, C: float = 0.001):
super().__init__(name=f"ZafarFairness, C={C}", sub_dir=SUB_DIR_IMPACT)
super().__init__(name=f"ZafarFairness, C={C}", module=SUB_DIR_IMPACT)
self._c = C

@property
Expand All @@ -165,7 +165,7 @@ def hyperparameters(self) -> HyperParamType:

@override
def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
return ["fit.py", train_name, model_path, "c", str(self._c)]
return ["-m", f"{self._module}.fit", train_name, model_path, "c", str(self._c)]


class ZafarEqOpp(_ZafarAlgorithmBase):
Expand All @@ -176,7 +176,7 @@ class ZafarEqOpp(_ZafarAlgorithmBase):

def __init__(self, *, tau: float = 5.0, mu: float = 1.2, eps: float = 0.0001):
name = f"{self._base_name}, τ={tau}, μ={mu} ε={eps}"
super().__init__(name=name, sub_dir=SUB_DIR_MISTREAT)
super().__init__(name=name, module=SUB_DIR_MISTREAT)
self._tau = tau
self._mu = mu
self._eps = eps
Expand All @@ -189,7 +189,8 @@ def hyperparameters(self) -> HyperParamType:
@override
def _get_fit_cmd(self, train_name: str, model_path: str) -> list[str]:
return [
"fit.py",
"-m",
f"{self._module}.fit",
train_name,
model_path,
self._mode,
Expand Down
4 changes: 2 additions & 2 deletions tests/models_test/inprocess_test/zafar_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_zafar(toy_train_test: TrainTestPair, zafar_teardown: None) -> None:
assert zafar_eq_odds.name == "ZafarEqOdds, τ=5.0, μ=1.2 ε=0.0001"

predictions = zafar_eq_odds.run(train, test)
assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 40
assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 42

predictions = zafar_eq_odds.fit(train).predict(test)
assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 40
assert np.count_nonzero(predictions.hard.to_numpy() == 1) == 42