Jump to content

Connect SuperML | Leeroopedia MCP: Equip your AI agents with best practices, code verification, and debugging knowledge. Powered by Leeroo — building Organizational Superintelligence. Contact us at founders@leeroo.com.

Implementation:Online ml River Base Ensemble

From Leeroopedia


Knowledge Sources
Domains Online_Learning, Ensemble_Learning, Base_Classes
Last Updated 2026-02-08 16:00 GMT

Overview

The Ensemble classes provide base functionality for ensemble models in River, supporting both heterogeneous collections of different models and homogeneous collections of cloned models.

Description

River provides two ensemble base classes. Ensemble inherits from Python's UserList and represents a collection of estimators, enforcing a minimum of two models and providing list-like access through the models property. WrapperEnsemble extends both Ensemble and Wrapper to create homogeneous ensembles by cloning a single base model multiple times, taking a model template, the number of copies to create, and an optional random seed for reproducibility. Both classes are generic over the estimator type T, ensuring type safety when working with specific estimator types.

Usage

Use Ensemble as the parent class when implementing ensemble methods that combine multiple heterogeneous models. Use WrapperEnsemble when your ensemble consists of multiple copies of the same base model (like bagging or boosting). WrapperEnsemble automatically handles model cloning and provides access to the original model template through the _wrapped_model property.

Code Reference

Source Location

Signature

class Ensemble(UserList[T]):
    """An ensemble is a model which is composed of a list of models."""

    def __init__(self, models: Iterator[T]) -> None

    @property
    def _min_number_of_models(self) -> int

    @property
    def models(self) -> list[T]


class WrapperEnsemble(Ensemble[T], Wrapper[T]):
    """A wrapper ensemble is an ensemble composed of multiple copies of the same model."""

    def __init__(self, model: T, n_models: int, seed: int | None) -> None

    @property
    def _wrapped_model(self) -> T


# Type variable
T = TypeVar("T", bound=Estimator)

Import

from river.base import Ensemble, WrapperEnsemble

I/O Contract

Ensemble

Parameter Type Description
models Iterator[T] Iterator of estimator instances to include in the ensemble
Property Type Description
models list[T] List of estimator instances in the ensemble
_min_number_of_models int Minimum number of models required (default: 2)

WrapperEnsemble

Parameter Type Description
model T The model template to clone
n_models int Number of copies to create
seed None Random seed for reproducibility
Property Type Description
_wrapped_model T The original model template
models list[T] List of cloned model instances
_rng Random Random number generator instance

Usage Examples

from river import tree
from river import linear_model
from river.base import Ensemble
from river import datasets

# Creating a heterogeneous ensemble manually
model1 = tree.HoeffdingTreeClassifier()
model2 = linear_model.LogisticRegression()

# Option 1: Use Ensemble base class directly
ensemble = Ensemble([model1, model2])
print(f"Number of models: {len(ensemble.models)}")

# Accessing individual models
for i, model in enumerate(ensemble.models):
    print(f"Model {i}: {model}")

# Creating a homogeneous ensemble with WrapperEnsemble
from river.base import WrapperEnsemble

base_model = tree.HoeffdingTreeClassifier()
ensemble = WrapperEnsemble(
    model=base_model,
    n_models=5,
    seed=42
)

# All models are independent clones
print(f"Number of cloned models: {len(ensemble.models)}")
print(f"Original model: {ensemble._wrapped_model}")

# Example: Implementing a simple voting ensemble
from river.base import Ensemble, Classifier
from collections import Counter

class VotingClassifier(Ensemble, Classifier):
    def __init__(self, models):
        super().__init__(models)

    def learn_one(self, x, y):
        # Train all models
        for model in self.models:
            model.learn_one(x, y)

    def predict_one(self, x):
        # Majority vote
        predictions = [model.predict_one(x) for model in self.models]
        if not any(predictions):
            return None
        votes = Counter(predictions)
        return votes.most_common(1)[0][0]

# Use the voting classifier
models = [
    tree.HoeffdingTreeClassifier(),
    linear_model.LogisticRegression(),
    tree.HoeffdingTreeClassifier(grace_period=50)
]
ensemble = VotingClassifier(models)

for x, y in datasets.Phishing().take(100):
    y_pred = ensemble.predict_one(x)
    ensemble.learn_one(x, y)

# Example: Implementing a bagging-style ensemble
from river.base import WrapperEnsemble, Classifier

class SimpleBagging(WrapperEnsemble, Classifier):
    def __init__(self, model, n_models=10, seed=None):
        super().__init__(model, n_models, seed)

    def learn_one(self, x, y):
        # Each model learns from a bootstrapped sample
        for model in self.models:
            # Simple approach: train with probability
            if self._rng.random() > 0.5:
                model.learn_one(x, y)

    def predict_proba_one(self, x):
        # Average probabilities
        all_probas = {}
        for model in self.models:
            probas = model.predict_proba_one(x)
            for label, proba in probas.items():
                all_probas[label] = all_probas.get(label, 0) + proba

        # Normalize
        n = len(self.models)
        return {label: proba / n for label, proba in all_probas.items()}

bagging = SimpleBagging(
    model=tree.HoeffdingTreeClassifier(),
    n_models=10,
    seed=42
)

Related Pages

Page Connections

Double-click a node to navigate. Hold to expand connections.
Principle
Implementation
Heuristic
Environment