Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add basic stat metric, and model performance report #136

Merged
merged 6 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 201 additions & 12 deletions src/smclarify/bias/metrics/basic_stats.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
from typing import List

from typing import List, Optional, Dict
from smclarify.bias.metrics.common import divide, binary_confusion_matrix
from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix
from pandas.api.types import CategoricalDtype
import pandas as pd
from .common import divide
from functional import seq

log = logging.getLogger(__name__)
logger = logging.getLogger(__name__)


def confusion_matrix(
Expand All @@ -20,15 +22,14 @@ def confusion_matrix(
:param sensitive_facet_index: boolean column indicating sensitive group
:param positive_label_index: boolean column indicating positive labels
:param positive_predicted_label_index: boolean column indicating positive predicted labels
:return list of fractions of true positives, false positives, false negatives, true negatives
:return fractions of true positives, false positives, false negatives, true negatives for
the sensitive facet only (sensitive_facet = True)
"""
TP_d = len(feature[positive_label_index & positive_predicted_label_index & sensitive_facet_index])
FN_d = len(feature[positive_label_index & (~positive_predicted_label_index) & sensitive_facet_index])

TN_d = len(feature[(~positive_label_index) & (~positive_predicted_label_index) & sensitive_facet_index])
FP_d = len(feature[(~positive_label_index) & positive_predicted_label_index & sensitive_facet_index])
size = len(feature[sensitive_facet_index])
return [divide(TP_d, size), divide(FP_d, size), divide(FN_d, size), divide(TN_d, size)]
return binary_confusion_matrix(
feature[sensitive_facet_index],
positive_label_index[sensitive_facet_index],
positive_predicted_label_index[sensitive_facet_index],
)


def proportion(sensitive_facet_index: pd.Series) -> float:
Expand All @@ -39,3 +40,191 @@ def proportion(sensitive_facet_index: pd.Series) -> float:
:return: the fraction of examples in the sensitive facet.
"""
return sum(sensitive_facet_index) / len(sensitive_facet_index)


def observed_label_distribution(
feature: pd.DataFrame, sensitive_facet_index: pd.Series, positive_label_index: pd.Series
) -> List[float]:
r"""
Distribution of observed label outcomes for sensitive facet

:param feature: input feature
:param sensitive_facet_index: boolean column indicating sensitive group
:param positive_label_index: boolean column indicating positive labels
:return: List of Proportion of positive and negative label outcomes
"""
pos = len(feature[sensitive_facet_index & positive_label_index])
n = len(feature[sensitive_facet_index])
proportion_pos = divide(pos, n)
return [proportion_pos, 1 - proportion_pos]


# Model Performance Metrics
def accuracy(TP: int, FP: int, TN: int, FN: int) -> float:
r"""
Proportion of inputs assigned the correct predicted label by the model.

:param: TP Counts of labels which were correctly predicted positive
:param: FP Counts of labels which were incorrectly predicted positive
:param: TN Counts of labels which were correctly predicted negative
:param: FN Counts of labels which were incorrectly predicted negative
:return: Proportion of inputs assigned the correct predicted label by the model.
"""
return divide(TN + TP, TN + FP + FN + TP)


def PPL(TP: int, FP: int, TN: int, FN: int) -> float:
r"""
Proportion of input assigned in positive predicted label.

:param: TP: Counts of labels which were correctly predicted positive
:param: FP: Counts of labels which were incorrectly predicted positive
:param: TN: Counts of labels which were correctly predicted negative
:param: FN: Counts of labels which were incorrectly predicted negative
:return: Proportion of inputs assigned the positive predicted label.
"""
return divide(TP + FP, TN + FP + FN + TP)


def PNL(TP: int, FP: int, TN: int, FN: int) -> float:
r"""
Proportion of input assigned the negative predicted label.

:param: TP: Counts of labels which were correctly predicted positive
:param: FP: Counts of labels which were incorrectly predicted positive
:param: TN: Counts of labels which were correctly predicted negative
:param: FN: Counts of labels which were incorrectly predicted negative
:return: Proportion of inputs assigned the negative predicted label.
"""
return divide(TN + FN, TN + FP + FN + TP)


def recall(TP: int, FN: int) -> float:
r"""
Proportion of inputs with positive observed label correctly assigned the positive predicted label.

:param: TP Counts of labels which were correctly predicted positive
:param: FN Counts of labels which were incorrectly predicted negative
:return: Proportion of inputs with positive observed label correctly assigned the positive predicted label.
"""
return divide(TP, TP + FN)


def specificity(TN: int, FP: int) -> float:
r"""
Proportion of inputs with negative observed label correctly assigned the negative predicted label.

:param: FP Counts of labels which were incorrectly predicted positive
:param: TN Counts of labels which were correctly predicted negative
:return: Proportion of inputs with negative observed label correctly assigned the negative predicted label.
"""
return divide(TN, TN + FP)


def precision(TP: int, FP: int) -> float:
r"""
Proportion of inputs with positive predicted label that actually have a positive observed label.

:param: TP Counts of labels which were correctly predicted positive
:param: FP Counts of labels which were incorrectly predicted positive
:return: Proportion of inputs with positive predicted label that actually have a positive observed label.
"""
return divide(TP, TP + FP)


def rejection_rate(TN: int, FN: int) -> float:
r"""
Proportion of inputs with negative predicted label that actually have a negative observed label.

:param: TN Counts of labels which were correctly predicted negative
:param: FN Counts of labels which were incorrectly predicted negative
:return: Proportion of inputs with negative predicted label that actually have a negative observed label.
"""
return divide(TN, TN + FN)


def conditional_acceptance(TP: int, FP: int, FN: int) -> float:
r"""
Ratio between the positive observed labels and positive predicted labels.

:param: TP Counts of labels which were correctly predicted positive
:param: FP Counts of labels which were incorrectly predicted positive
:param: FN Counts of labels which were incorrectly predicted negative
:return: Ratio between the positive observed labels and positive predicted labels.
"""
return divide(TP + FN, TP + FP)


def conditional_rejection(FP: int, TN: int, FN: int) -> float:
r"""
Ratio between the negative observed labels and negative predicted labels.

:param: FP Counts of labels which were incorrectly predicted positive
:param: TN Counts of labels which were correctly predicted negative
:param: FN Counts of labels which were incorrectly predicted negative
:return: Ratio between the negative observed labels and negative predicted labels.
"""
return divide(TN + FP, TN + FN)


def f1_score(TP: int, FP: int, FN: int) -> float:
r"""
Harmonic mean of precision and recall.

:param: TP Counts of labels which were correctly predicted positive
:param: FP Counts of labels which were incorrectly predicted positive
:param: FN Counts of labels which were incorrectly predicted negative
:return: Harmonic mean of precision and recall.
"""
precision_score = precision(TP, FP)
recall_score = recall(TP, FN)
return 2 * divide(precision_score * recall_score, precision_score + recall_score)


# Model Performance Metrics
def multicategory_confusion_matrix(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be good to add an example in the docstring for reference, as I had to look at the test to fully understand it.

label_series: pd.Series, predicted_label_series: pd.Series
) -> Optional[Dict[str, Dict]]:
"""
Confusion Matrix for categorical label cases.
:param label_series: Label Data Series
:param predicted_label_series: Predicted Label Data Series
:param unique_label_values: List of unique label values computed from the set of true and predicted labels
:return: Matrix JSON where rows refer to true labels, and columns refer to predicted labels
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the values in each column? Are these normalized? Looking at the code, does not look like the values are normalized, but in confusion_matrix, we normalize them. Why the difference?

"""
# Handle differing pd.Series dtypes
unique_label_values = list(label_series.unique())
unique_label_values.sort()
if label_series.dtype.name != predicted_label_series.dtype.name:
try:
predicted_label_series = predicted_label_series.astype(label_series.dtype)
except Exception as e:
logger.warning(
f"Predicted Label Series type {predicted_label_series.dtype.name} could not be cast as Label Series type {label_series.dtype.name}. "
f"Multicategory Confusion Matrix won't be computed due to: {e}"
)
return None
# Handle CategoricalDtype difference (see test/integration/test_bias_metrics)
if label_series.dtype == "category":
try:
pred_label_category = predicted_label_series.dtype.categories.astype(label_series.dtype.categories.dtype)
category_obj = CategoricalDtype(pred_label_category, label_series.dtype.ordered)
predicted_label_series = predicted_label_series.astype(category_obj)
except Exception as e:
logger.warning(
f"Predicted Label Series could not be cast as Label Series type. "
f"Multicategory Confusion Matrix won't be computed due to: {e}"
)
return None
confusion_matrix_array = sklearn_confusion_matrix(label_series, predicted_label_series, labels=unique_label_values)
assert confusion_matrix_array.shape == (
len(unique_label_values),
len(unique_label_values),
)
matrix_json = {}
unique_label_strings = [str(val) for val in unique_label_values]
for index, val in enumerate(unique_label_strings):
confusion_matrix_floats = [float(cfn_val) for cfn_val in confusion_matrix_array[index]]
matrix_json[val] = seq(unique_label_strings).zip(confusion_matrix_floats).dict()

return matrix_json
20 changes: 20 additions & 0 deletions src/smclarify/bias/metrics/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@ def metric_description(metric: Callable[..., float]) -> str:
return metric.__doc__.lstrip().split("\n")[0] # type: ignore


def binary_confusion_matrix(
feature: pd.Series, positive_label_index: pd.Series, positive_predicted_label_index: pd.Series
) -> List[int]:
assert len(feature) == len(positive_label_index) == len(positive_predicted_label_index)
TP, TN, FP, FN = calc_confusion_matrix_quadrants(feature, positive_label_index, positive_predicted_label_index)
n = len(feature)
return [divide(TP, n), divide(FP, n), divide(FN, n), divide(TN, n)]


def calc_confusion_matrix_quadrants(
feature: pd.Series, positive_label_index: pd.Series, positive_predicted_label_index: pd.Series
) -> Tuple[int, int, int, int]:
TP = len(feature[positive_label_index & positive_predicted_label_index])
TN = len(feature[~positive_label_index & (~positive_predicted_label_index)])

FP = len(feature[(~positive_label_index) & positive_predicted_label_index])
FN = len(feature[(positive_label_index) & (~positive_predicted_label_index)])
return TP, TN, FP, FN


def DPL(feature: pd.Series, sensitive_facet_index: pd.Series, positive_label_index: pd.Series) -> float:
require(sensitive_facet_index.dtype == bool, "sensitive_facet_index must be of type bool")
require(positive_label_index.dtype == bool, "label_index must be of type bool")
Expand Down
Loading