Overview
The Ensemble classes provide base functionality for ensemble models in River, supporting both heterogeneous collections of different models and homogeneous collections of cloned models.
Description
River provides two ensemble base classes. Ensemble inherits from Python's UserList and represents a collection of estimators, enforcing a minimum of two models and providing list-like access through the models property. WrapperEnsemble extends both Ensemble and Wrapper to create homogeneous ensembles by cloning a single base model multiple times, taking a model template, the number of copies to create, and an optional random seed for reproducibility. Both classes are generic over the estimator type T, ensuring type safety when working with specific estimator types.
Usage
Use Ensemble as the parent class when implementing ensemble methods that combine multiple heterogeneous models. Use WrapperEnsemble when your ensemble consists of multiple copies of the same base model (like bagging or boosting). WrapperEnsemble automatically handles model cloning and provides access to the original model template through the _wrapped_model property.
Code Reference
Source Location
Signature
class Ensemble(UserList[T]):
"""An ensemble is a model which is composed of a list of models."""
def __init__(self, models: Iterator[T]) -> None
@property
def _min_number_of_models(self) -> int
@property
def models(self) -> list[T]
class WrapperEnsemble(Ensemble[T], Wrapper[T]):
"""A wrapper ensemble is an ensemble composed of multiple copies of the same model."""
def __init__(self, model: T, n_models: int, seed: int | None) -> None
@property
def _wrapped_model(self) -> T
# Type variable
T = TypeVar("T", bound=Estimator)
Import
from river.base import Ensemble, WrapperEnsemble
I/O Contract
Ensemble
| Parameter |
Type |
Description
|
| models |
Iterator[T] |
Iterator of estimator instances to include in the ensemble
|
| Property |
Type |
Description
|
| models |
list[T] |
List of estimator instances in the ensemble
|
| _min_number_of_models |
int |
Minimum number of models required (default: 2)
|
WrapperEnsemble
| Parameter |
Type |
Description
|
| model |
T |
The model template to clone
|
| n_models |
int |
Number of copies to create
|
| seed |
None |
Random seed for reproducibility
|
| Property |
Type |
Description
|
| _wrapped_model |
T |
The original model template
|
| models |
list[T] |
List of cloned model instances
|
| _rng |
Random |
Random number generator instance
|
Usage Examples
from river import tree
from river import linear_model
from river.base import Ensemble
from river import datasets
# Creating a heterogeneous ensemble manually
model1 = tree.HoeffdingTreeClassifier()
model2 = linear_model.LogisticRegression()
# Option 1: Use Ensemble base class directly
ensemble = Ensemble([model1, model2])
print(f"Number of models: {len(ensemble.models)}")
# Accessing individual models
for i, model in enumerate(ensemble.models):
print(f"Model {i}: {model}")
# Creating a homogeneous ensemble with WrapperEnsemble
from river.base import WrapperEnsemble
base_model = tree.HoeffdingTreeClassifier()
ensemble = WrapperEnsemble(
model=base_model,
n_models=5,
seed=42
)
# All models are independent clones
print(f"Number of cloned models: {len(ensemble.models)}")
print(f"Original model: {ensemble._wrapped_model}")
# Example: Implementing a simple voting ensemble
from river.base import Ensemble, Classifier
from collections import Counter
class VotingClassifier(Ensemble, Classifier):
def __init__(self, models):
super().__init__(models)
def learn_one(self, x, y):
# Train all models
for model in self.models:
model.learn_one(x, y)
def predict_one(self, x):
# Majority vote
predictions = [model.predict_one(x) for model in self.models]
if not any(predictions):
return None
votes = Counter(predictions)
return votes.most_common(1)[0][0]
# Use the voting classifier
models = [
tree.HoeffdingTreeClassifier(),
linear_model.LogisticRegression(),
tree.HoeffdingTreeClassifier(grace_period=50)
]
ensemble = VotingClassifier(models)
for x, y in datasets.Phishing().take(100):
y_pred = ensemble.predict_one(x)
ensemble.learn_one(x, y)
# Example: Implementing a bagging-style ensemble
from river.base import WrapperEnsemble, Classifier
class SimpleBagging(WrapperEnsemble, Classifier):
def __init__(self, model, n_models=10, seed=None):
super().__init__(model, n_models, seed)
def learn_one(self, x, y):
# Each model learns from a bootstrapped sample
for model in self.models:
# Simple approach: train with probability
if self._rng.random() > 0.5:
model.learn_one(x, y)
def predict_proba_one(self, x):
# Average probabilities
all_probas = {}
for model in self.models:
probas = model.predict_proba_one(x)
for label, proba in probas.items():
all_probas[label] = all_probas.get(label, 0) + proba
# Normalize
n = len(self.models)
return {label: proba / n for label, proba in all_probas.items()}
bagging = SimpleBagging(
model=tree.HoeffdingTreeClassifier(),
n_models=10,
seed=42
)
Related Pages