Implementation:Online ml River Conf RegressionJackknife
| Knowledge Sources | |
|---|---|
| Domains | Online_Learning, Conformal_Prediction, Regression, Uncertainty_Quantification |
| Last Updated | 2026-02-08 16:00 GMT |
Overview
RegressionJackknife implements conformal prediction for regression using the jackknife method to produce prediction intervals.
Description
RegressionJackknife is a conformal prediction wrapper that augments any regression model with prediction intervals. The method tracks the distribution of residuals (errors) and uses quantiles of this distribution to construct prediction intervals around point predictions.
The algorithm works by computing the residual for each prediction before updating the model, ensuring that intervals are not optimistically biased. The residuals are fed into online quantile estimators that track the lower and upper quantiles corresponding to the desired confidence level. For a 95% confidence level, it tracks the 2.5th and 97.5th percentiles.
The method produces marginal prediction intervals, meaning they are not conditioned on the input features x. This is a limitation of the jackknife method, but it makes it simple, efficient, and robust to outliers. The intervals work naturally in an online setting as the quantile estimators update continuously.
An optional window_size parameter allows using rolling quantiles instead of cumulative quantiles, which adapts to non-stationary environments where model performance changes over time.
Usage
Use RegressionJackknife when you need prediction intervals for regression models in online learning scenarios. It's particularly useful for risk-sensitive applications where quantifying uncertainty is crucial, such as forecasting with quality guarantees, anomaly detection with calibrated thresholds, or decision-making under uncertainty.
Code Reference
Source Location
- Repository: Online_ml_River
- File: river/conf/jackknife.py
Signature
class RegressionJackknife(base.Wrapper[T], base.Regressor):
def __init__(
self,
regressor: T,
confidence_level: float = 0.95,
window_size: int | None = None,
):
...
Import
from river import conf
I/O Contract
| Parameter | Type | Description |
|---|---|---|
| regressor | base.Regressor | Regression model to wrap |
| confidence_level | float | Confidence level for intervals (default 0.95) |
| window_size | int or None | Size of rolling window (None for cumulative) |
| x | dict | Feature dictionary |
| y | float | Target value |
| Method | Return Type | Description |
|---|---|---|
| learn_one(x, y) | None | Updates model and residual quantiles |
| predict_one(x, with_interval=False) | float | Point prediction |
| predict_one(x, with_interval=True) | Interval | Prediction interval |
| Method | Parameters | Description |
|---|---|---|
| learn_one(x, y) | x: dict, y: float | Updates regressor after computing residual |
| predict_one(x, with_interval) | x: dict, with_interval: bool | Returns prediction or interval |
| Name | Type | Default | Description |
|---|---|---|---|
| regressor | base.Regressor | required | Base regression model |
| confidence_level | float | 0.95 | Confidence level (e.g., 0.9, 0.95, 0.99) |
| window_size | int or None | None | Window size for rolling quantiles |
Usage Examples
from river import conf
from river import datasets
from river import linear_model
from river import metrics
from river import preprocessing
from river import stats
# Example 1: Basic usage
dataset = datasets.TrumpApproval()
model = conf.RegressionJackknife(
(
preprocessing.StandardScaler() |
linear_model.LinearRegression(intercept_lr=.1)
),
confidence_level=0.9
)
validity = stats.Mean()
efficiency = stats.Mean()
for x, y in dataset:
interval = model.predict_one(x, with_interval=True)
validity.update(y in interval)
efficiency.update(interval.width)
model.learn_one(x, y)
print(f"Validity (coverage): {validity.get():.3f}") # ~0.939
print(f"Efficiency (avg width): {efficiency.get():.3f}") # ~4.078
# Example 2: Without intervals (standard prediction)
for x, y in dataset.take(10):
y_pred = model.predict_one(x, with_interval=False)
# Or simply: y_pred = model.predict_one(x)
print(f"Prediction: {y_pred:.2f}, True: {y:.2f}")
model.learn_one(x, y)
# Example 3: Using rolling window for non-stationary data
model_rolling = conf.RegressionJackknife(
preprocessing.StandardScaler() | linear_model.LinearRegression(),
confidence_level=0.95,
window_size=100 # Only use last 100 samples for quantiles
)
# This adapts to changing model performance
# Example 4: Comparing confidence levels
dataset = datasets.TrumpApproval()
models = {
'80%': conf.RegressionJackknife(
preprocessing.StandardScaler() | linear_model.LinearRegression(),
confidence_level=0.80
),
'90%': conf.RegressionJackknife(
preprocessing.StandardScaler() | linear_model.LinearRegression(),
confidence_level=0.90
),
'95%': conf.RegressionJackknife(
preprocessing.StandardScaler() | linear_model.LinearRegression(),
confidence_level=0.95
),
}
results = {level: {'validity': stats.Mean(), 'width': stats.Mean()}
for level in models.keys()}
for x, y in dataset:
for level, model in models.items():
interval = model.predict_one(x, with_interval=True)
results[level]['validity'].update(y in interval)
results[level]['width'].update(interval.width)
model.learn_one(x, y)
for level, metrics in results.items():
print(f"{level} confidence:")
print(f" Coverage: {metrics['validity'].get():.3f}")
print(f" Avg width: {metrics['width'].get():.3f}")
# Example 5: Anomaly detection using intervals
from river import anomaly
dataset = datasets.CreditCard()
model = conf.RegressionJackknife(
preprocessing.StandardScaler() | linear_model.LinearRegression(),
confidence_level=0.99 # Wide intervals for anomaly detection
)
# Use a simple heuristic: anomaly if outside 99% interval
anomalies = []
for i, (x, y) in enumerate(dataset.take(1000)):
if i > 50: # Need some data to build intervals
interval = model.predict_one(x, with_interval=True)
if y not in interval:
anomalies.append(i)
print(f"Anomaly at index {i}: y={y:.2f}, interval=[{interval.lower:.2f}, {interval.upper:.2f}]")
model.learn_one(x, y)
print(f"Detected {len(anomalies)} anomalies")
# Example 6: Progressive validation with intervals
from river import evaluate
dataset = datasets.TrumpApproval()
model = conf.RegressionJackknife(
preprocessing.StandardScaler() | linear_model.LinearRegression(intercept_lr=0.1),
confidence_level=0.90
)
metric = metrics.MAE()
# Custom evaluation tracking intervals
class IntervalMetrics:
def __init__(self):
self.coverage = []
self.widths = []
self.mae_list = []
def update(self, y_true, y_pred, interval):
self.coverage.append(y_true in interval)
self.widths.append(interval.width)
self.mae_list.append(abs(y_true - y_pred))
def get_stats(self):
return {
'coverage': sum(self.coverage) / len(self.coverage),
'avg_width': sum(self.widths) / len(self.widths),
'mae': sum(self.mae_list) / len(self.mae_list)
}
interval_metrics = IntervalMetrics()
for x, y in dataset:
interval = model.predict_one(x, with_interval=True)
y_pred = interval.center # Use center as point prediction
interval_metrics.update(y, y_pred, interval)
model.learn_one(x, y)
stats = interval_metrics.get_stats()
print(f"Coverage: {stats['coverage']:.3f}")
print(f"Average width: {stats['avg_width']:.3f}")
print(f"MAE: {stats['mae']:.3f}")
# Example 7: Combining with ensemble models
from river import ensemble
base_model = preprocessing.StandardScaler() | ensemble.BaggingRegressor(
model=linear_model.LinearRegression(),
n_models=10
)
conformal_ensemble = conf.RegressionJackknife(
base_model,
confidence_level=0.95
)
for x, y in dataset.take(100):
interval = conformal_ensemble.predict_one(x, with_interval=True)
print(f"Ensemble interval: [{interval.lower:.2f}, {interval.upper:.2f}]")
conformal_ensemble.learn_one(x, y)