Source code for causalml.metrics.rate

from typing import Optional

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats
import logging

plt.style.use("fivethirtyeight")
RANDOM_COL = "Random"

logger = logging.getLogger("causalml")


def _compute_rate_from_toc(toc, weighting):
    """Compute the RATE scalar from a TOC DataFrame.

    Args:
        toc (pandas.DataFrame): TOC curve indexed by quantile q
        weighting (str): one of ``"autoc"`` or ``"qini"``

    Returns:
        (pandas.Series): RATE score for each model column
    """
    quantiles = toc.index.values
    q_mid = (quantiles[:-1] + quantiles[1:]) / 2
    toc_mid = (toc.iloc[:-1].values + toc.iloc[1:].values) / 2
    if weighting == "autoc":
        weights = 1.0 / q_mid
    else:
        weights = q_mid
    weights = weights / weights.sum()
    return pd.Series(
        np.average(toc_mid, axis=0, weights=weights),
        index=toc.columns,
    )


[docs] def get_toc( df, outcome_col="y", treatment_col="w", treatment_effect_col="tau", normalize=False, ): """Get the Targeting Operator Characteristic (TOC) of model estimates in population. TOC(q) is the difference between the ATE among the top-q fraction of units ranked by the prioritization score and the overall ATE. A positive TOC at low q indicates the model successfully identifies units with above-average treatment benefit. By definition, TOC(0) = 0 and TOC(1) = 0 (the subset ATE equals the overall ATE when the entire population is selected). If the true treatment effect is provided (e.g. in synthetic data), it's used directly to calculate TOC. Otherwise, it's estimated as the difference between the mean outcomes of the treatment and control groups in each quantile band. Note: when using observed outcomes (i.e. without ``treatment_effect_col``), the subset ATE is estimated via a naive difference-in-means. This is valid for randomized experiments (RCTs) but may be biased for observational data due to confounding within quantile bands. For observational settings, compute doubly-robust (AIPW) pseudo-outcomes externally and pass them as ``treatment_effect_col``. See Yadlowsky et al. (2021), Section 4 for details. If a quantile band contains only treated or only control units, the code falls back to TOC(q) = 0 for that band (i.e., subset ATE is set to the overall ATE). This is a conservative approximation and is logged as a warning. For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules via Rank-Weighted Average Treatment Effects`. https://arxiv.org/abs/2111.07966 For the former, `treatment_effect_col` should be provided. For the latter, both `outcome_col` and `treatment_col` should be provided. Args: df (pandas.DataFrame): a data frame with model estimates and actual data as columns outcome_col (str, optional): the column name for the actual outcome treatment_col (str, optional): the column name for the treatment indicator (0 or 1) treatment_effect_col (str, optional): the column name for the true treatment effect normalize (bool, optional): whether to normalize the TOC curve by its maximum absolute value. Uses max(|TOC|) as the reference to avoid division by zero at q=1 where TOC is always zero by definition. Returns: (pandas.DataFrame): TOC values of model estimates in population, indexed by quantile q """ assert ( (outcome_col in df.columns and df[outcome_col].notnull().all()) and (treatment_col in df.columns and df[treatment_col].notnull().all()) or ( treatment_effect_col in df.columns and df[treatment_effect_col].notnull().all() ) ), "{outcome_col} and {treatment_col}, or {treatment_effect_col} should be present without null.".format( outcome_col=outcome_col, treatment_col=treatment_col, treatment_effect_col=treatment_effect_col, ) df = df.copy() model_names = [ x for x in df.columns if x not in [outcome_col, treatment_col, treatment_effect_col] ] use_oracle = ( treatment_effect_col in df.columns and df[treatment_effect_col].notnull().all() ) if use_oracle: overall_ate = df[treatment_effect_col].mean() else: treated = df[treatment_col] == 1 overall_ate = ( df.loc[treated, outcome_col].mean() - df.loc[~treated, outcome_col].mean() ) n_total = len(df) toc = [] for col in model_names: sorted_df = df.sort_values(col, ascending=False).reset_index(drop=True) if use_oracle: # O(n) via cumulative sum cumsum_tau = sorted_df[treatment_effect_col].cumsum().values counts = np.arange(1, n_total + 1) subset_ates = cumsum_tau / counts else: cumsum_tr = sorted_df[treatment_col].cumsum().values cumsum_ct = np.arange(1, n_total + 1) - cumsum_tr cumsum_y_tr = ( (sorted_df[outcome_col] * sorted_df[treatment_col]).cumsum().values ) cumsum_y_ct = ( (sorted_df[outcome_col] * (1 - sorted_df[treatment_col])) .cumsum() .values ) # Guard against division by zero when a band is all-treated or all-control; # fall back to overall_ate (TOC = 0) for those positions. with np.errstate(invalid="ignore", divide="ignore"): subset_ates = np.where( (cumsum_tr == 0) | (cumsum_ct == 0), overall_ate, cumsum_y_tr / cumsum_tr - cumsum_y_ct / cumsum_ct, ) if np.any((cumsum_tr == 0) | (cumsum_ct == 0)): logger.warning( "Some quantile bands contain only treated or only control units " "for column '%s'. TOC is set to 0 for those positions.", col, ) toc_values = subset_ates - overall_ate toc.append(pd.Series(toc_values, index=np.linspace(0, 1, n_total + 1)[1:])) toc = pd.concat(toc, join="inner", axis=1) toc.loc[0] = np.zeros((toc.shape[1],)) toc = toc.sort_index().interpolate() toc.columns = model_names toc.index.name = "q" if normalize: # Normalize by max absolute value rather than the value at q=1, which is # always zero by definition and would cause division by zero. max_abs = toc.abs().max() max_abs = max_abs.replace(0, 1) # guard for flat TOC curves toc = toc.div(max_abs, axis=1) return toc
[docs] def rate_score( df, outcome_col="y", treatment_col="w", treatment_effect_col="tau", weighting="autoc", normalize=False, return_ci=False, n_bootstrap=200, alpha=0.05, random_state=None, ): """Calculate the Rank-weighted Average Treatment Effect (RATE) score. RATE is the weighted area under the Targeting Operator Characteristic (TOC) curve: RATE = integral_0^1 alpha(q) * TOC(q) dq Two standard weighting schemes are supported (Yadlowsky et al., 2021): - ``"autoc"``: alpha(q) = 1/q. Places more weight on the highest-priority units. Most powerful when treatment effects are concentrated in a small subgroup. - ``"qini"``: alpha(q) = q. Uniform weighting across units; reduces to the Qini coefficient. More powerful when treatment effects are diffuse across the population. A positive RATE indicates the prioritization rule effectively identifies units with above-average treatment benefit. A RATE near zero suggests little heterogeneity or a poor prioritization rule. Note: the integral is approximated via a weighted mean over the discrete quantile grid using midpoint values. Weights are normalized to sum to 1 (i.e. ``weights / weights.sum()``), so the absolute scale matches the TOC values but may differ slightly from the paper's continuous integral definition. Model rankings are preserved. When return_ci=True, standard errors and confidence intervals are estimated via the half-sample bootstrap (m = n // 2 draws without replacement), which gives valid coverage for the RATE functional per the Yadlowsky et al. (2021) functional CLT. The p-value tests H0: RATE = 0 (i.e. the model's prioritization is no better than random) using a two-sided z-test. When using observed outcomes (without ``treatment_effect_col``), the underlying TOC estimates the subset ATE via naive difference-in-means, which is valid for RCTs but biased for observational data. For observational settings, pass AIPW pseudo-outcomes as ``treatment_effect_col``. See the ``get_toc()`` docstring for details. For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules via Rank-Weighted Average Treatment Effects`. https://arxiv.org/abs/2111.07966 For the former, `treatment_effect_col` should be provided. For the latter, both `outcome_col` and `treatment_col` should be provided. Args: df (pandas.DataFrame): a data frame with model estimates and actual data as columns outcome_col (str, optional): the column name for the actual outcome treatment_col (str, optional): the column name for the treatment indicator (0 or 1) treatment_effect_col (str, optional): the column name for the true treatment effect weighting (str, optional): the weighting scheme for the RATE integral. One of ``"autoc"`` (default) or ``"qini"``. normalize (bool, optional): whether to normalize the TOC curve before scoring return_ci (bool, optional): whether to return bootstrap confidence intervals and p-values. Default False. n_bootstrap (int, optional): number of half-sample bootstrap iterations. Only used when return_ci=True. Default 200. alpha (float, optional): significance level for confidence intervals. Only used when return_ci=True. Default 0.05. random_state (int or None, optional): random seed for the bootstrap sampler. Pass an integer for reproducible results. Default None. Returns: If return_ci=False: (pandas.Series): RATE scores of model estimates If return_ci=True: (pandas.DataFrame): RATE score, standard error, CI lower bound, CI upper bound, and p-value for each model estimate column """ assert weighting in ( "autoc", "qini", ), "{} weighting is not implemented. Select one of {}".format( weighting, ("autoc", "qini") ) toc = get_toc( df, outcome_col=outcome_col, treatment_col=treatment_col, treatment_effect_col=treatment_effect_col, normalize=normalize, ) rate = _compute_rate_from_toc(toc, weighting) rate.name = "RATE ({})".format(weighting) if not return_ci: return rate # Half-sample bootstrap for SE and p-value n = len(df) m = n // 2 model_names = toc.columns.tolist() boot_scores = {model: [] for model in model_names} rng = np.random.default_rng(random_state) for _ in range(n_bootstrap): idx = rng.choice(n, size=m, replace=False) df_boot = df.iloc[idx].reset_index(drop=True) toc_boot = get_toc( df_boot, outcome_col=outcome_col, treatment_col=treatment_col, treatment_effect_col=treatment_effect_col, normalize=normalize, ) rate_boot = _compute_rate_from_toc(toc_boot, weighting) for model in model_names: boot_scores[model].append(rate_boot[model]) z_crit = stats.norm.ppf(1 - alpha / 2) results = [] for model in model_names: point = rate[model] boot = np.array(boot_scores[model]) se = np.std(boot, ddof=1) ci_lower = point - z_crit * se ci_upper = point + z_crit * se z_stat = point / se if se > 0 else np.inf p_value = 2 * (1 - stats.norm.cdf(abs(z_stat))) results.append( { "model": model, "rate": point, "se": se, "ci_lower": ci_lower, "ci_upper": ci_upper, "p_value": p_value, } ) return pd.DataFrame(results).set_index("model")
[docs] def plot_toc( df, outcome_col="y", treatment_col="w", treatment_effect_col="tau", normalize=False, n=100, figsize=(8, 8), ax: Optional[plt.Axes] = None, ) -> plt.Axes: """Plot the Targeting Operator Characteristic (TOC) curve of model estimates. The TOC(q) shows the excess ATE when treating only the top-q fraction of units prioritized by a model score, relative to the overall ATE. A positive and steeply decreasing curve indicates the model effectively ranks high-benefit units first. If the true treatment effect is provided (e.g. in synthetic data), it's used directly. Otherwise, it's estimated from observed outcomes and treatment assignments. For details, see Yadlowsky et al. (2021), `Evaluating Treatment Prioritization Rules via Rank-Weighted Average Treatment Effects`. https://arxiv.org/abs/2111.07966 For the former, `treatment_effect_col` should be provided. For the latter, both `outcome_col` and `treatment_col` should be provided. Args: df (pandas.DataFrame): a data frame with model estimates and actual data as columns outcome_col (str, optional): the column name for the actual outcome treatment_col (str, optional): the column name for the treatment indicator (0 or 1) treatment_effect_col (str, optional): the column name for the true treatment effect normalize (bool, optional): whether to normalize the TOC curve by its maximum absolute value before plotting n (int, optional): the number of samples to be used for plotting figsize (tuple, optional): the size of the figure to plot ax (plt.Axes, optional): an existing axes object to draw on Returns: (plt.Axes): the matplotlib Axes with the TOC plot """ toc = get_toc( df, outcome_col=outcome_col, treatment_col=treatment_col, treatment_effect_col=treatment_effect_col, normalize=normalize, ) if (n is not None) and (n < toc.shape[0]): toc = toc.iloc[np.linspace(0, len(toc) - 1, n, endpoint=True).astype(int)] if ax is None: _, ax = plt.subplots(figsize=figsize) ax = toc.plot(ax=ax) # Random baseline (TOC = 0 everywhere) ax.plot( [toc.index[0], toc.index[-1]], [0, 0], label=RANDOM_COL, color="k", linestyle="--", ) ax.legend() ax.set_xlabel("Fraction treated (q)") ax.set_ylabel("TOC(q)") ax.set_title("Targeting Operator Characteristic (TOC)") return ax