Source code for causalml.inference.meta.tlearner

from copy import deepcopy
import logging
import numpy as np
from packaging import version
from scipy.stats import norm
import sklearn
from sklearn.exceptions import ConvergenceWarning
from sklearn.neural_network import MLPRegressor

if version.parse(sklearn.__version__) >= version.parse("0.22.0"):
    from sklearn.utils._testing import ignore_warnings
else:
    from sklearn.utils.testing import ignore_warnings
from tqdm import tqdm
from xgboost import XGBRegressor

from causalml.inference.meta.base import BaseLearner
from causalml.inference.meta.utils import (
    check_treatment_vector,
    collect_if_lazy,
    filter_mask,
    n_rows,
    to_numpy,
)
from causalml.metrics import regression_metrics, classification_metrics

logger = logging.getLogger("causalml")



[docs]
class BaseTLearner(BaseLearner):
    """A parent class for T-learner regressor classes.

    A T-learner estimates treatment effects with two machine learning models.

    Details of T-learner are available at `Kunzel et al. (2018) <https://arxiv.org/abs/1706.03461>`_.
    """

    def __init__(
        self,
        learner=None,
        control_learner=None,
        treatment_learner=None,
        ate_alpha=0.05,
        control_name=0,
    ):
        """Initialize a T-learner.

        Args:
            learner (model): a model to estimate control and treatment outcomes.
            control_learner (model, optional): a model to estimate control outcomes
            treatment_learner (model, optional): a model to estimate treatment outcomes
            ate_alpha (float, optional): the confidence level alpha of the ATE estimate
            control_name (str or int, optional): name of control group

        Note: arguments are stored verbatim (scikit-learn convention) so that
        ``get_params`` / ``clone`` work correctly. Model construction is deferred
        to ``fit()``. Per the scikit-learn convention, ``__init__`` does not
        validate or raise — validation happens in ``fit()``.
        """
        # Store verbatim — no deepcopy, no logic (scikit-learn convention).
        self.learner = learner
        self.control_learner = control_learner
        self.treatment_learner = treatment_learner
        self.ate_alpha = ate_alpha
        self.control_name = control_name


[docs]
    @ignore_warnings(category=ConvergenceWarning)
    def fit(
        self,
        X,
        treatment,
        y,
        p=None,
        store_bootstraps=False,
        n_bootstraps=200,
        bootstrap_size=10000,
        random_state=None,
        n_jobs=1,
    ):
        """Fit the inference model.

        Args:
            X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix.
                A pl.LazyFrame is collected once at the start of this method; the
                feature matrix is otherwise kept in its native format throughout.
            treatment (np.array, pd.Series, or pl.Series): a treatment vector
            y (np.array, pd.Series, or pl.Series): an outcome vector
            p: unused, kept for API consistency
            store_bootstraps (bool, optional): if True, trains a bootstrap ensemble
                during fit and stores it in self.bootstrap_models_ for post-fit CI
                estimation via predict(return_ci=True). Default: False.
            n_bootstraps (int, optional): number of bootstrap iterations. Default: 200.
            n_jobs (int, optional): number of parallel jobs for bootstrap fitting.
                -1 uses all available cores. Default: 1.
            bootstrap_size (int, optional): number of samples per bootstrap. Default: 10000.
            random_state (int, optional): random seed for reproducible bootstrap sampling.
        """
        X = collect_if_lazy(X)
        if (self.learner is None) and (
            (self.control_learner is None) or (self.treatment_learner is None)
        ):
            raise ValueError(
                "Either `learner` or both `control_learner` and `treatment_learner` "
                "must be specified."
            )
        check_treatment_vector(treatment, self.control_name)
        treatment_np = to_numpy(treatment)
        y_np = to_numpy(y)

        self.t_groups = np.unique(treatment_np[treatment_np != self.control_name])
        self.t_groups.sort()
        self._classes = {group: i for i, group in enumerate(self.t_groups)}

        # Resolve base models from stored constructor args (no templates needed).
        _control_learner = (
            self.control_learner
            if self.control_learner is not None
            else deepcopy(self.learner)
        )
        _treatment_learner = (
            self.treatment_learner
            if self.treatment_learner is not None
            else deepcopy(self.learner)
        )

        self.models_t = {group: deepcopy(_treatment_learner) for group in self.t_groups}

        # model_c is trained on the control group, which is identical for every
        # treatment group, so fit it once.
        control_mask = treatment_np == self.control_name
        self.model_c = deepcopy(_control_learner)
        self.model_c.fit(filter_mask(X, control_mask), y_np[control_mask])
        # Expose as a shared-reference dict to preserve the public models_c API.
        self.models_c = {group: self.model_c for group in self.t_groups}

        for group in self.t_groups:
            treatment_mask = treatment_np == group
            self.models_t[group].fit(
                filter_mask(X, treatment_mask), y_np[treatment_mask]
            )

        if store_bootstraps:
            self.fit_bootstrap_ensemble(
                X=X,
                treatment=treatment_np,
                y=y_np,
                n_bootstraps=n_bootstraps,
                bootstrap_size=bootstrap_size,
                random_state=random_state,
                n_jobs=n_jobs,
            )
        else:
            self.bootstrap_models_ = None
        return self


    def _compute_bootstrap_ci(self, X):
        """Compute bootstrap CI using stored ensemble.

        Args:
            X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix
        Returns:
            (te_lower, te_upper): percentile CI bounds, each of shape [n_samples, n_treatment]
        """
        if self.bootstrap_models_ is None:
            raise ValueError(
                "No bootstrap ensemble found. Call fit(..., store_bootstraps=True) first."
            )
        te_bootstraps = np.zeros(
            (n_rows(X), self.t_groups.shape[0], len(self.bootstrap_models_))
        )
        for b, learner_b in enumerate(self.bootstrap_models_):
            te_bootstraps[:, :, b] = learner_b.predict(X)
        te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2)
        te_upper = np.percentile(te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2)
        return te_lower, te_upper


[docs]
    def predict(
        self,
        X,
        treatment=None,
        y=None,
        p=None,
        return_components=False,
        verbose=True,
        return_ci=False,
    ):
        """Predict treatment effects.

        Args:
            X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix.
                A pl.LazyFrame is collected once at the start of this method.
            treatment (np.array, pd.Series, or pl.Series, optional): a treatment vector
            y (np.array, pd.Series, or pl.Series, optional): an outcome vector
            return_components (bool, optional): whether to return outcome for treatment and control seperately
            verbose (bool, optional): whether to output progress logs
            return_ci (bool, optional): whether to return confidence intervals
                using the stored bootstrap ensemble. Requires fit() to have been
                called with store_bootstraps=True.
        Returns:
            (numpy.ndarray): Predictions of treatment effects. If return_ci=True,
                returns (te, te_lower, te_upper) each of shape [n_samples, n_treatment].
        """
        if return_ci and return_components:
            raise ValueError("return_ci and return_components cannot both be True.")

        X = collect_if_lazy(X)
        yhat_ts = {}

        yhat_c = self.model_c.predict(X)
        # Shared-reference dict — no array duplication
        yhat_cs = {group: yhat_c for group in self.t_groups}

        for group in self.t_groups:
            yhat_ts[group] = self.models_t[group].predict(X)

            if (y is not None) and (treatment is not None) and verbose:
                treatment_np = to_numpy(treatment)
                mask = (treatment_np == group) | (treatment_np == self.control_name)
                treatment_filt_np = treatment_np[mask]
                y_filt = to_numpy(filter_mask(y, mask))
                w = (treatment_filt_np == group).astype(int)

                yhat = np.zeros_like(y_filt, dtype=float)
                yhat[w == 0] = yhat_c[mask][w == 0]
                yhat[w == 1] = yhat_ts[group][mask][w == 1]

                logger.info("Error metrics for group {}".format(group))
                regression_metrics(y_filt, yhat, w)

        te = np.zeros((n_rows(X), self.t_groups.shape[0]))
        for i, group in enumerate(self.t_groups):
            te[:, i] = yhat_ts[group] - yhat_c

        if return_ci:
            te_lower, te_upper = self._compute_bootstrap_ci(X)
            return te, te_lower, te_upper

        if not return_components:
            return te
        else:
            return te, yhat_cs, yhat_ts



[docs]
    def fit_predict(
        self,
        X,
        treatment,
        y,
        p=None,
        return_ci=False,
        n_bootstraps=1000,
        bootstrap_size=10000,
        return_components=False,
        verbose=True,
    ):
        """Fit the inference model of the T learner and predict treatment effects.

        Args:
            X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix
            treatment (np.array, pd.Series, or pl.Series): a treatment vector
            y (np.array, pd.Series, or pl.Series): an outcome vector
            return_ci (bool): whether to return confidence intervals
            n_bootstraps (int): number of bootstrap iterations
            bootstrap_size (int): number of samples per bootstrap
            return_components (bool, optional): whether to return outcome for treatment and control seperately
            verbose (str): whether to output progress logs
        Returns:
            (numpy.ndarray): Predictions of treatment effects. Output dim: [n_samples, n_treatment].
                If return_ci, returns CATE [n_samples, n_treatment], LB [n_samples, n_treatment],
                UB [n_samples, n_treatment]
        """
        X = collect_if_lazy(X)
        treatment_np = to_numpy(treatment)
        y_np = to_numpy(y)

        self.fit(X, treatment_np, y_np)
        te = self.predict(X, treatment_np, y_np, return_components=return_components)

        if not return_ci:
            return te
        else:
            t_groups_global = self.t_groups
            _classes_global = self._classes
            model_c_global = deepcopy(self.model_c)
            models_t_global = deepcopy(self.models_t)
            te_bootstraps = np.zeros(
                shape=(n_rows(X), self.t_groups.shape[0], n_bootstraps)
            )

            logger.info("Bootstrap Confidence Intervals")
            for i in tqdm(range(n_bootstraps)):
                te_b = self.bootstrap(X, treatment_np, y_np, size=bootstrap_size)
                te_bootstraps[:, :, i] = te_b

            te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2)
            te_upper = np.percentile(
                te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2
            )

            # set member variables back to global (currently last bootstrapped outcome)
            self.t_groups = t_groups_global
            self._classes = _classes_global
            self.model_c = deepcopy(model_c_global)
            self.models_c = {group: self.model_c for group in self.t_groups}
            self.models_t = deepcopy(models_t_global)

            return (te, te_lower, te_upper)



[docs]
    def estimate_ate(
        self,
        X,
        treatment,
        y,
        p=None,
        bootstrap_ci=False,
        n_bootstraps=1000,
        bootstrap_size=10000,
        pretrain=False,
    ):
        """Estimate the Average Treatment Effect (ATE).

        Args:
            X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix
            treatment (np.array, pd.Series, or pl.Series): a treatment vector
            y (np.array, pd.Series, or pl.Series): an outcome vector
            bootstrap_ci (bool): whether to return confidence intervals
            n_bootstraps (int): number of bootstrap iterations
            bootstrap_size (int): number of samples per bootstrap
            pretrain (bool): whether a model has been fit, default False.
        Returns:
            The mean and confidence interval (LB, UB) of the ATE estimate.
        """
        X = collect_if_lazy(X)
        treatment_np = to_numpy(treatment)
        y_np = to_numpy(y)

        if pretrain:
            te, yhat_cs, yhat_ts = self.predict(
                X, treatment_np, y_np, return_components=True
            )
        else:
            te, yhat_cs, yhat_ts = self.fit_predict(
                X, treatment_np, y_np, return_components=True
            )

        ate = np.zeros(self.t_groups.shape[0])
        ate_lb = np.zeros(self.t_groups.shape[0])
        ate_ub = np.zeros(self.t_groups.shape[0])

        for i, group in enumerate(self.t_groups):
            _ate = te[:, i].mean()

            mask = (treatment_np == group) | (treatment_np == self.control_name)
            treatment_filt = treatment_np[mask]
            y_filt = y_np[mask]
            w = (treatment_filt == group).astype(int)
            prob_treatment = float(sum(w)) / w.shape[0]

            yhat_c = yhat_cs[group][mask]
            yhat_t = yhat_ts[group][mask]

            se = np.sqrt(
                (
                    (y_filt[w == 0] - yhat_c[w == 0]).var() / (1 - prob_treatment)
                    + (y_filt[w == 1] - yhat_t[w == 1]).var() / prob_treatment
                    + (yhat_t - yhat_c).var()
                )
                / y_filt.shape[0]
            )

            _ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
            _ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)

            ate[i] = _ate
            ate_lb[i] = _ate_lb
            ate_ub[i] = _ate_ub

        if not bootstrap_ci:
            return ate, ate_lb, ate_ub
        else:
            t_groups_global = self.t_groups
            _classes_global = self._classes
            model_c_global = deepcopy(self.model_c)
            models_t_global = deepcopy(self.models_t)

            logger.info("Bootstrap Confidence Intervals for ATE")
            ate_bootstraps = np.zeros(shape=(self.t_groups.shape[0], n_bootstraps))

            for n in tqdm(range(n_bootstraps)):
                ate_b = self.bootstrap(X, treatment_np, y_np, size=bootstrap_size)
                ate_bootstraps[:, n] = ate_b.mean(axis=0)

            ate_lower = np.percentile(
                ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1
            )
            ate_upper = np.percentile(
                ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1
            )

            # set member variables back to global (currently last bootstrapped outcome)
            self.t_groups = t_groups_global
            self._classes = _classes_global
            self.model_c = deepcopy(model_c_global)
            self.models_c = {group: self.model_c for group in self.t_groups}
            self.models_t = deepcopy(models_t_global)

            return ate, ate_lower, ate_upper





[docs]
class BaseTRegressor(BaseTLearner):
    """A parent class for T-learner regressor classes."""

    def __init__(
        self,
        learner=None,
        control_learner=None,
        treatment_learner=None,
        ate_alpha=0.05,
        control_name=0,
    ):
        """Initialize a T-learner regressor.

        Args:
            learner (model): a model to estimate control and treatment outcomes.
            control_learner (model, optional): a model to estimate control outcomes
            treatment_learner (model, optional): a model to estimate treatment outcomes
            ate_alpha (float, optional): the confidence level alpha of the ATE estimate
            control_name (str or int, optional): name of control group
        """
        super().__init__(
            learner=learner,
            control_learner=control_learner,
            treatment_learner=treatment_learner,
            ate_alpha=ate_alpha,
            control_name=control_name,
        )




[docs]
class BaseTClassifier(BaseTLearner):
    """A parent class for T-learner classifier classes."""

    def __init__(
        self,
        learner=None,
        control_learner=None,
        treatment_learner=None,
        ate_alpha=0.05,
        control_name=0,
    ):
        """Initialize a T-learner classifier.

        Args:
            learner (model): a model to estimate control and treatment outcomes.
            control_learner (model, optional): a model to estimate control outcomes
            treatment_learner (model, optional): a model to estimate treatment outcomes
            ate_alpha (float, optional): the confidence level alpha of the ATE estimate
            control_name (str or int, optional): name of control group
        """
        super().__init__(
            learner=learner,
            control_learner=control_learner,
            treatment_learner=treatment_learner,
            ate_alpha=ate_alpha,
            control_name=control_name,
        )


[docs]
    def predict(
        self,
        X,
        treatment=None,
        y=None,
        p=None,
        return_components=False,
        verbose=True,
        return_ci=False,
    ):
        """Predict treatment effects.

        Args:
            X (np.matrix, np.array, pd.DataFrame, pl.DataFrame, or pl.LazyFrame): a feature matrix.
                A pl.LazyFrame is collected once at the start of this method.
            treatment (np.array, pd.Series, or pl.Series, optional): a treatment vector
            y (np.array, pd.Series, or pl.Series, optional): an outcome vector
            return_components (bool, optional): whether to return outcome for treatment and control seperately
            verbose (bool, optional): whether to output progress logs
            return_ci (bool, optional): whether to return confidence intervals using
                the stored bootstrap ensemble.
        Returns:
            (numpy.ndarray): Predictions of treatment effects.
        """
        # Fail-fast: validate mutually exclusive flags before doing any work.
        # Consistent with BaseTLearner.predict which checks at the top.
        if return_ci and return_components:
            raise ValueError("return_ci and return_components cannot both be True.")

        X = collect_if_lazy(X)
        yhat_ts = {}

        yhat_c = self.model_c.predict_proba(X)[:, 1]
        yhat_cs = {group: yhat_c for group in self.t_groups}

        for group in self.t_groups:
            yhat_ts[group] = self.models_t[group].predict_proba(X)[:, 1]

            if (y is not None) and (treatment is not None) and verbose:
                treatment_np = to_numpy(treatment)
                mask = (treatment_np == group) | (treatment_np == self.control_name)
                treatment_filt_np = treatment_np[mask]
                y_filt = to_numpy(filter_mask(y, mask))
                w = (treatment_filt_np == group).astype(int)

                yhat = np.zeros_like(y_filt, dtype=float)
                yhat[w == 0] = yhat_c[mask][w == 0]
                yhat[w == 1] = yhat_ts[group][mask][w == 1]

                logger.info("Error metrics for group {}".format(group))
                classification_metrics(y_filt, yhat, w)

        te = np.zeros((n_rows(X), self.t_groups.shape[0]))
        for i, group in enumerate(self.t_groups):
            te[:, i] = yhat_ts[group] - yhat_c

        if return_ci:
            te_lower, te_upper = self._compute_bootstrap_ci(X)
            return te, te_lower, te_upper

        if not return_components:
            return te
        else:
            return te, yhat_cs, yhat_ts





[docs]
class XGBTRegressor(BaseTRegressor):
    def __init__(self, ate_alpha=0.05, control_name=0, *args, **kwargs):
        """Initialize a T-learner with two XGBoost models."""
        super().__init__(
            learner=XGBRegressor(*args, **kwargs),
            ate_alpha=ate_alpha,
            control_name=control_name,
        )




[docs]
class MLPTRegressor(BaseTRegressor):
    def __init__(self, ate_alpha=0.05, control_name=0, *args, **kwargs):
        """Initialize a T-learner with two MLP models."""
        super().__init__(
            learner=MLPRegressor(*args, **kwargs),
            ate_alpha=ate_alpha,
            control_name=control_name,
        )