"""Using hard-coded dollar amounts x for false positives and y for false negatives, calculate the cost of a model using: `(x * FP + y * FN) / N`"""

import typing
import numpy as np
from h2oaicore.metrics import CustomScorer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix

class CostBinary(CustomScorer):
_description = "Calculates cost per row in binary classification: `(fp_cost*FP + fn_cost*FN) / N`"
_binary = True
_maximize = False
_perfect_score = 0
_display_name = "Cost"
_threshold = 0.5  # Example only, should be adjusted based on domain knowledge and other experiments

# The cost of false positives and negatives will vary by data set, we use the rules from the below as an example
# https://www.kaggle.com/uciml/aps-failure-at-scania-trucks-data-set
_fp_cost = 10
_fn_cost = 500

def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
# label actuals as 1 or 0
lb = LabelEncoder()
labels = lb.fit_transform(labels)
actual = lb.transform(actual)

# label predictions as 1 or 0
predicted = predicted >= self._threshold

# use sklearn to get fp and fn
cm = confusion_matrix(actual, predicted, sample_weight=sample_weight, labels=labels)
tn, fp, fn, tp = cm.ravel()

# calculate`\$1*FP + \$2*FN`
return ((fp * self.__class__._fp_cost) + (fn * self.__class__._fn_cost)) / (
tn + fp + fn + tp)  # divide by total weighted count to make loss invariant to data size