Source code for causalml.optimize.policylearner

import logging

import numpy as np
from causalml.propensity import compute_propensity_score
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier


logger = logging.getLogger("causalml")


[docs]class PolicyLearner: """ A Learner that learns a treatment assignment policy with observational data using doubly robust estimator of causal effect for binary treatment. Details of the policy learner are available at `Athey and Wager (2018) <https://arxiv.org/abs/1702.02896>`_. """ def __init__( self, outcome_learner=GradientBoostingRegressor(), treatment_learner=GradientBoostingClassifier(), policy_learner=DecisionTreeClassifier(), clip_bounds=(1e-3, 1 - 1e-3), n_fold=5, random_state=None, calibration=False, ): """Initialize a treatment assignment policy learner. Args: outcome_learner (optional): a regression model to estimate outcomes policy_learner (optional): a classification model to estimate treatment assignment. It needs to take `sample_weight` as an input argument for `fit()` clip_bounds (tuple, optional): lower and upper bounds for clipping propensity scores to avoid division by zero in PolicyLearner.fit() n_fold (int, optional): the number of cross validation folds for outcome_learner random_state (int or RandomState, optional): a seed (int) or random number generator (RandomState) """ self.model_mu = outcome_learner self.model_w = treatment_learner self.model_pi = policy_learner self.clip_bounds = clip_bounds self.cv = KFold(n_splits=n_fold, shuffle=True, random_state=random_state) self.calibration = calibration self._y_pred, self._tau_pred, self._w_pred, self._dr_score = ( None, None, None, None, ) def __repr__(self): return ( "{}(model_mu={},\n" "\tmodel_w={},\n" "\tmodel_pi={})".format( self.__class__.__name__, self.model_mu.__repr__(), self.model_w.__repr__(), self.model_pi.__repr__(), ) ) def _outcome_estimate(self, X, w, y): self._y_pred = np.zeros(len(y)) self._tau_pred = np.zeros(len(y)) for train_index, test_index in self.cv.split(y): X_train, X_test = X[train_index], X[test_index] w_train, w_test = w[train_index], w[test_index] y_train, _ = y[train_index], y[test_index] self.model_mu.fit( np.concatenate([X_train, w_train.reshape(-1, 1)], axis=1), y_train ) self._y_pred[test_index] = self.model_mu.predict( np.concatenate([X_test, w_test.reshape(-1, 1)], axis=1) ) self._tau_pred[test_index] = self.model_mu.predict( np.concatenate([X_test, np.ones((len(w_test), 1))], axis=1) ) - self.model_mu.predict( np.concatenate([X_test, np.zeros((len(w_test), 1))], axis=1) ) def _treatment_estimate(self, X, w): self._w_pred = np.zeros(len(w)) for train_index, test_index in self.cv.split(w): X_train, X_test = X[train_index], X[test_index] w_train, w_test = w[train_index], w[test_index] self._w_pred[test_index], _ = compute_propensity_score( X=X_train, treatment=w_train, X_pred=X_test, treatment_pred=w_test, calibrate_p=self.calibration, ) self._w_pred = np.clip( self._w_pred, a_min=self.clip_bounds[0], a_max=self.clip_bounds[1] )
[docs] def fit(self, X, treatment, y, p=None, dhat=None): """Fit the treatment assignment policy learner. Args: X (np.matrix): a feature matrix treatment (np.array): a treatment vector (1 if treated, otherwise 0) y (np.array): an outcome vector p (optional, np.array): user provided propensity score vector between 0 and 1 dhat (optinal, np.array): user provided predicted treatment effect vector Returns: self: returns an instance of self. """ logger.info( "generating out-of-fold CV outcome estimates with {}".format(self.model_mu) ) self._outcome_estimate(X, treatment, y) if dhat is not None: self._tau_pred = dhat if p is None: self._treatment_estimate(X, treatment) else: self._w_pred = np.clip(p, self.clip_bounds[0], self.clip_bounds[1]) # Doubly Robust Modification self._dr_score = self._tau_pred + (treatment - self._w_pred) / self._w_pred / ( 1 - self._w_pred ) * (y - self._y_pred) target = self._dr_score.copy() target = np.sign(target) logger.info("training the treatment assignment model, {}".format(self.model_pi)) self.model_pi.fit(X, target, sample_weight=abs(self._dr_score)) return self
[docs] def predict(self, X): """Predict treatment assignment that optimizes the outcome. Args: X (np.matrix): a feature matrix Returns: (numpy.ndarray): predictions of treatment assignment. """ return self.model_pi.predict(X)
[docs] def predict_proba(self, X): """Predict treatment assignment score that optimizes the outcome. Args: X (np.matrix): a feature matrix Returns: (numpy.ndarray): predictions of treatment assignment score. """ pi_hat = self.model_pi.predict_proba(X)[:, 1] return pi_hat