Implementation:Online ml River Conf RegressionJackknife

Knowledge Sources	Online_ml_River
Domains	Online_Learning, Conformal_Prediction, Regression, Uncertainty_Quantification
Last Updated	2026-02-08 16:00 GMT

Overview

RegressionJackknife implements conformal prediction for regression using the jackknife method to produce prediction intervals.

Description

RegressionJackknife is a conformal prediction wrapper that augments any regression model with prediction intervals. The method tracks the distribution of residuals (errors) and uses quantiles of this distribution to construct prediction intervals around point predictions.

The algorithm works by computing the residual for each prediction before updating the model, ensuring that intervals are not optimistically biased. The residuals are fed into online quantile estimators that track the lower and upper quantiles corresponding to the desired confidence level. For a 95% confidence level, it tracks the 2.5th and 97.5th percentiles.

The method produces marginal prediction intervals, meaning they are not conditioned on the input features x. This is a limitation of the jackknife method, but it makes it simple, efficient, and robust to outliers. The intervals work naturally in an online setting as the quantile estimators update continuously.

An optional window_size parameter allows using rolling quantiles instead of cumulative quantiles, which adapts to non-stationary environments where model performance changes over time.

Usage

Use RegressionJackknife when you need prediction intervals for regression models in online learning scenarios. It's particularly useful for risk-sensitive applications where quantifying uncertainty is crucial, such as forecasting with quality guarantees, anomaly detection with calibrated thresholds, or decision-making under uncertainty.

Code Reference

Source Location

Repository: Online_ml_River
File: river/conf/jackknife.py

Signature

class RegressionJackknife(base.Wrapper[T], base.Regressor):
    def __init__(
        self,
        regressor: T,
        confidence_level: float = 0.95,
        window_size: int | None = None,
    ):
        ...

Import

from river import conf

I/O Contract

Input
Parameter	Type	Description
regressor	base.Regressor	Regression model to wrap
confidence_level	float	Confidence level for intervals (default 0.95)
window_size	int or None	Size of rolling window (None for cumulative)
x	dict	Feature dictionary
y	float	Target value

Output
Method	Return Type	Description
learn_one(x, y)	None	Updates model and residual quantiles
predict_one(x, with_interval=False)	float	Point prediction
predict_one(x, with_interval=True)	Interval	Prediction interval

Key Methods
Method	Parameters	Description
learn_one(x, y)	x: dict, y: float	Updates regressor after computing residual
predict_one(x, with_interval)	x: dict, with_interval: bool	Returns prediction or interval

Parameters
Name	Type	Default	Description
regressor	base.Regressor	required	Base regression model
confidence_level	float	0.95	Confidence level (e.g., 0.9, 0.95, 0.99)
window_size	int or None	None	Window size for rolling quantiles

Usage Examples

from river import conf
from river import datasets
from river import linear_model
from river import metrics
from river import preprocessing
from river import stats

# Example 1: Basic usage
dataset = datasets.TrumpApproval()

model = conf.RegressionJackknife(
    (
        preprocessing.StandardScaler() |
        linear_model.LinearRegression(intercept_lr=.1)
    ),
    confidence_level=0.9
)

validity = stats.Mean()
efficiency = stats.Mean()

for x, y in dataset:
    interval = model.predict_one(x, with_interval=True)
    validity.update(y in interval)
    efficiency.update(interval.width)
    model.learn_one(x, y)

print(f"Validity (coverage): {validity.get():.3f}")  # ~0.939
print(f"Efficiency (avg width): {efficiency.get():.3f}")  # ~4.078

# Example 2: Without intervals (standard prediction)
for x, y in dataset.take(10):
    y_pred = model.predict_one(x, with_interval=False)
    # Or simply: y_pred = model.predict_one(x)
    print(f"Prediction: {y_pred:.2f}, True: {y:.2f}")
    model.learn_one(x, y)

# Example 3: Using rolling window for non-stationary data
model_rolling = conf.RegressionJackknife(
    preprocessing.StandardScaler() | linear_model.LinearRegression(),
    confidence_level=0.95,
    window_size=100  # Only use last 100 samples for quantiles
)

# This adapts to changing model performance

# Example 4: Comparing confidence levels
dataset = datasets.TrumpApproval()

models = {
    '80%': conf.RegressionJackknife(
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        confidence_level=0.80
    ),
    '90%': conf.RegressionJackknife(
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        confidence_level=0.90
    ),
    '95%': conf.RegressionJackknife(
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        confidence_level=0.95
    ),
}

results = {level: {'validity': stats.Mean(), 'width': stats.Mean()}
           for level in models.keys()}

for x, y in dataset:
    for level, model in models.items():
        interval = model.predict_one(x, with_interval=True)
        results[level]['validity'].update(y in interval)
        results[level]['width'].update(interval.width)
        model.learn_one(x, y)

for level, metrics in results.items():
    print(f"{level} confidence:")
    print(f"  Coverage: {metrics['validity'].get():.3f}")
    print(f"  Avg width: {metrics['width'].get():.3f}")

# Example 5: Anomaly detection using intervals
from river import anomaly

dataset = datasets.CreditCard()

model = conf.RegressionJackknife(
    preprocessing.StandardScaler() | linear_model.LinearRegression(),
    confidence_level=0.99  # Wide intervals for anomaly detection
)

# Use a simple heuristic: anomaly if outside 99% interval
anomalies = []

for i, (x, y) in enumerate(dataset.take(1000)):
    if i > 50:  # Need some data to build intervals
        interval = model.predict_one(x, with_interval=True)
        if y not in interval:
            anomalies.append(i)
            print(f"Anomaly at index {i}: y={y:.2f}, interval=[{interval.lower:.2f}, {interval.upper:.2f}]")

    model.learn_one(x, y)

print(f"Detected {len(anomalies)} anomalies")

# Example 6: Progressive validation with intervals
from river import evaluate

dataset = datasets.TrumpApproval()

model = conf.RegressionJackknife(
    preprocessing.StandardScaler() | linear_model.LinearRegression(intercept_lr=0.1),
    confidence_level=0.90
)

metric = metrics.MAE()

# Custom evaluation tracking intervals
class IntervalMetrics:
    def __init__(self):
        self.coverage = []
        self.widths = []
        self.mae_list = []

    def update(self, y_true, y_pred, interval):
        self.coverage.append(y_true in interval)
        self.widths.append(interval.width)
        self.mae_list.append(abs(y_true - y_pred))

    def get_stats(self):
        return {
            'coverage': sum(self.coverage) / len(self.coverage),
            'avg_width': sum(self.widths) / len(self.widths),
            'mae': sum(self.mae_list) / len(self.mae_list)
        }

interval_metrics = IntervalMetrics()

for x, y in dataset:
    interval = model.predict_one(x, with_interval=True)
    y_pred = interval.center  # Use center as point prediction

    interval_metrics.update(y, y_pred, interval)

    model.learn_one(x, y)

stats = interval_metrics.get_stats()
print(f"Coverage: {stats['coverage']:.3f}")
print(f"Average width: {stats['avg_width']:.3f}")
print(f"MAE: {stats['mae']:.3f}")

# Example 7: Combining with ensemble models
from river import ensemble

base_model = preprocessing.StandardScaler() | ensemble.BaggingRegressor(
    model=linear_model.LinearRegression(),
    n_models=10
)

conformal_ensemble = conf.RegressionJackknife(
    base_model,
    confidence_level=0.95
)

for x, y in dataset.take(100):
    interval = conformal_ensemble.predict_one(x, with_interval=True)
    print(f"Ensemble interval: [{interval.lower:.2f}, {interval.upper:.2f}]")
    conformal_ensemble.learn_one(x, y)

Related Pages

Page Connections

Double-click a node to navigate. Hold to expand connections.

Principle

Implementation

Heuristic

Environment