Jump to content

Connect SuperML | Leeroopedia MCP: Equip your AI agents with best practices, code verification, and debugging knowledge. Powered by Leeroo — building Organizational Superintelligence. Contact us at founders@leeroo.com.

Implementation:Online ml River Conf RegressionJackknife

From Leeroopedia


Knowledge Sources
Domains Online_Learning, Conformal_Prediction, Regression, Uncertainty_Quantification
Last Updated 2026-02-08 16:00 GMT

Overview

RegressionJackknife implements conformal prediction for regression using the jackknife method to produce prediction intervals.

Description

RegressionJackknife is a conformal prediction wrapper that augments any regression model with prediction intervals. The method tracks the distribution of residuals (errors) and uses quantiles of this distribution to construct prediction intervals around point predictions.

The algorithm works by computing the residual for each prediction before updating the model, ensuring that intervals are not optimistically biased. The residuals are fed into online quantile estimators that track the lower and upper quantiles corresponding to the desired confidence level. For a 95% confidence level, it tracks the 2.5th and 97.5th percentiles.

The method produces marginal prediction intervals, meaning they are not conditioned on the input features x. This is a limitation of the jackknife method, but it makes it simple, efficient, and robust to outliers. The intervals work naturally in an online setting as the quantile estimators update continuously.

An optional window_size parameter allows using rolling quantiles instead of cumulative quantiles, which adapts to non-stationary environments where model performance changes over time.

Usage

Use RegressionJackknife when you need prediction intervals for regression models in online learning scenarios. It's particularly useful for risk-sensitive applications where quantifying uncertainty is crucial, such as forecasting with quality guarantees, anomaly detection with calibrated thresholds, or decision-making under uncertainty.

Code Reference

Source Location

Signature

class RegressionJackknife(base.Wrapper[T], base.Regressor):
    def __init__(
        self,
        regressor: T,
        confidence_level: float = 0.95,
        window_size: int | None = None,
    ):
        ...

Import

from river import conf

I/O Contract

Input
Parameter Type Description
regressor base.Regressor Regression model to wrap
confidence_level float Confidence level for intervals (default 0.95)
window_size int or None Size of rolling window (None for cumulative)
x dict Feature dictionary
y float Target value
Output
Method Return Type Description
learn_one(x, y) None Updates model and residual quantiles
predict_one(x, with_interval=False) float Point prediction
predict_one(x, with_interval=True) Interval Prediction interval
Key Methods
Method Parameters Description
learn_one(x, y) x: dict, y: float Updates regressor after computing residual
predict_one(x, with_interval) x: dict, with_interval: bool Returns prediction or interval
Parameters
Name Type Default Description
regressor base.Regressor required Base regression model
confidence_level float 0.95 Confidence level (e.g., 0.9, 0.95, 0.99)
window_size int or None None Window size for rolling quantiles

Usage Examples

from river import conf
from river import datasets
from river import linear_model
from river import metrics
from river import preprocessing
from river import stats

# Example 1: Basic usage
dataset = datasets.TrumpApproval()

model = conf.RegressionJackknife(
    (
        preprocessing.StandardScaler() |
        linear_model.LinearRegression(intercept_lr=.1)
    ),
    confidence_level=0.9
)

validity = stats.Mean()
efficiency = stats.Mean()

for x, y in dataset:
    interval = model.predict_one(x, with_interval=True)
    validity.update(y in interval)
    efficiency.update(interval.width)
    model.learn_one(x, y)

print(f"Validity (coverage): {validity.get():.3f}")  # ~0.939
print(f"Efficiency (avg width): {efficiency.get():.3f}")  # ~4.078

# Example 2: Without intervals (standard prediction)
for x, y in dataset.take(10):
    y_pred = model.predict_one(x, with_interval=False)
    # Or simply: y_pred = model.predict_one(x)
    print(f"Prediction: {y_pred:.2f}, True: {y:.2f}")
    model.learn_one(x, y)

# Example 3: Using rolling window for non-stationary data
model_rolling = conf.RegressionJackknife(
    preprocessing.StandardScaler() | linear_model.LinearRegression(),
    confidence_level=0.95,
    window_size=100  # Only use last 100 samples for quantiles
)

# This adapts to changing model performance

# Example 4: Comparing confidence levels
dataset = datasets.TrumpApproval()

models = {
    '80%': conf.RegressionJackknife(
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        confidence_level=0.80
    ),
    '90%': conf.RegressionJackknife(
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        confidence_level=0.90
    ),
    '95%': conf.RegressionJackknife(
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        confidence_level=0.95
    ),
}

results = {level: {'validity': stats.Mean(), 'width': stats.Mean()}
           for level in models.keys()}

for x, y in dataset:
    for level, model in models.items():
        interval = model.predict_one(x, with_interval=True)
        results[level]['validity'].update(y in interval)
        results[level]['width'].update(interval.width)
        model.learn_one(x, y)

for level, metrics in results.items():
    print(f"{level} confidence:")
    print(f"  Coverage: {metrics['validity'].get():.3f}")
    print(f"  Avg width: {metrics['width'].get():.3f}")

# Example 5: Anomaly detection using intervals
from river import anomaly

dataset = datasets.CreditCard()

model = conf.RegressionJackknife(
    preprocessing.StandardScaler() | linear_model.LinearRegression(),
    confidence_level=0.99  # Wide intervals for anomaly detection
)

# Use a simple heuristic: anomaly if outside 99% interval
anomalies = []

for i, (x, y) in enumerate(dataset.take(1000)):
    if i > 50:  # Need some data to build intervals
        interval = model.predict_one(x, with_interval=True)
        if y not in interval:
            anomalies.append(i)
            print(f"Anomaly at index {i}: y={y:.2f}, interval=[{interval.lower:.2f}, {interval.upper:.2f}]")

    model.learn_one(x, y)

print(f"Detected {len(anomalies)} anomalies")

# Example 6: Progressive validation with intervals
from river import evaluate

dataset = datasets.TrumpApproval()

model = conf.RegressionJackknife(
    preprocessing.StandardScaler() | linear_model.LinearRegression(intercept_lr=0.1),
    confidence_level=0.90
)

metric = metrics.MAE()

# Custom evaluation tracking intervals
class IntervalMetrics:
    def __init__(self):
        self.coverage = []
        self.widths = []
        self.mae_list = []

    def update(self, y_true, y_pred, interval):
        self.coverage.append(y_true in interval)
        self.widths.append(interval.width)
        self.mae_list.append(abs(y_true - y_pred))

    def get_stats(self):
        return {
            'coverage': sum(self.coverage) / len(self.coverage),
            'avg_width': sum(self.widths) / len(self.widths),
            'mae': sum(self.mae_list) / len(self.mae_list)
        }

interval_metrics = IntervalMetrics()

for x, y in dataset:
    interval = model.predict_one(x, with_interval=True)
    y_pred = interval.center  # Use center as point prediction

    interval_metrics.update(y, y_pred, interval)

    model.learn_one(x, y)

stats = interval_metrics.get_stats()
print(f"Coverage: {stats['coverage']:.3f}")
print(f"Average width: {stats['avg_width']:.3f}")
print(f"MAE: {stats['mae']:.3f}")

# Example 7: Combining with ensemble models
from river import ensemble

base_model = preprocessing.StandardScaler() | ensemble.BaggingRegressor(
    model=linear_model.LinearRegression(),
    n_models=10
)

conformal_ensemble = conf.RegressionJackknife(
    base_model,
    confidence_level=0.95
)

for x, y in dataset.take(100):
    interval = conformal_ensemble.predict_one(x, with_interval=True)
    print(f"Ensemble interval: [{interval.lower:.2f}, {interval.upper:.2f}]")
    conformal_ensemble.learn_one(x, y)

Related Pages

Page Connections

Double-click a node to navigate. Hold to expand connections.
Principle
Implementation
Heuristic
Environment