Implementation:Spotify Luigi Parameter Types
Overview
Concrete tool for making pipelines configurable via typed, validated parameters, provided by Luigi.
Description
Luigi's parameter system is built on the Parameter base class and a rich hierarchy of typed subclasses. Parameters are declared as class-level attributes on Task subclasses. The framework handles parsing from strings (command line or config), serialization, validation, normalization, and contribution to task identity.
Key parameter types include:
Parameter-- Base class; value is astr.IntParameter-- Parses and stores anint.FloatParameter-- Parses and stores afloat.BoolParameter-- Parses a boolean; defaults toFalse. Supports implicit (flag-style) and explicit parsing modes.DateParameter-- Parses adatetime.datefromYYYY-MM-DDformat.DateIntervalParameter-- Parses aDateInterval(year, month, week, date, or custom range).ListParameter-- Parses a JSON array into a tuple.DictParameter-- Parses a JSON object into aFrozenOrderedDict.PathParameter-- Parses a filesystem path into apathlib.Path.
Each type also has an Optional* variant (e.g. OptionalIntParameter) that treats empty strings as None.
Usage
Use Luigi parameters when:
- You need to pass typed configuration values into tasks from the command line, config files, or code.
- You want automatic parsing, validation, and serialization of parameter values.
- You need parameters to contribute to (or be excluded from) the unique task identity.
Code Reference
Parameter (base class)
| Attribute | Value |
|---|---|
| Source Location | luigi/parameter.py, lines 112-355
|
| Signature | class Parameter(Generic[T])
|
| Import | from luigi import Parameter or import luigi; luigi.Parameter
|
class Parameter(Generic[T]):
def __init__(self, default=_no_value, is_global=False, significant=True,
description=None, config_path=None, positional=True,
always_in_help=False, batch_method=None,
visibility=ParameterVisibility.PUBLIC):
...
Constructor parameters:
| Name | Type | Default | Description |
|---|---|---|---|
default |
any | (no value) | Default value; must match the parameter type. |
significant |
bool |
True |
Whether this parameter affects the task's unique identity. |
description |
str or None |
None |
Human-readable description shown in --help.
|
config_path |
dict or None |
None |
Deprecated. Dict with section and name keys for config lookup.
|
positional |
bool |
True |
Whether the parameter can be set positionally. |
always_in_help |
bool |
False |
Always show in --help output.
|
batch_method |
callable or None |
None |
Function to combine batched parameter values. |
visibility |
ParameterVisibility |
PUBLIC |
Controls visibility: PUBLIC, HIDDEN, or PRIVATE.
|
Key methods:
parse(x)-- Converts a string to the parameter's native type. Base implementation returns the string unchanged.serialize(x)-- Converts the native value back to a string.normalize(x)-- Validates or clamps the parsed value.
DateParameter
| Attribute | Value |
|---|---|
| Source Location | luigi/parameter.py, lines 467-512
|
| Signature | class DateParameter(_DateParameterBase)
|
| Import | from luigi import DateParameter
|
class DateParameter(_DateParameterBase):
"""Parameter whose value is a datetime.date.
Parsed from YYYY-MM-DD format."""
date_format = '%Y-%m-%d'
def __init__(self, interval=1, start=None, **kwargs):
super().__init__(**kwargs)
self.interval = interval
self.start = start if start is not None else _UNIX_EPOCH.date()
IntParameter
| Attribute | Value |
|---|---|
| Source Location | luigi/parameter.py, lines 700-712
|
| Signature | class IntParameter(Parameter[int])
|
| Import | from luigi import IntParameter
|
class IntParameter(Parameter[int]):
"""Parameter whose value is an int."""
def parse(self, x):
return int(x)
def next_in_enumeration(self, value):
return value + 1
BoolParameter
| Attribute | Value |
|---|---|
| Source Location | luigi/parameter.py, lines 739-807
|
| Signature | class BoolParameter(Parameter[bool])
|
| Import | from luigi import BoolParameter
|
class BoolParameter(Parameter[bool]):
"""Parameter whose value is a bool. Defaults to False."""
IMPLICIT_PARSING = "implicit"
EXPLICIT_PARSING = "explicit"
parsing = IMPLICIT_PARSING
def __init__(self, *args, **kwargs):
self.parsing = kwargs.pop("parsing", self.__class__.parsing)
super().__init__(*args, **kwargs)
if self._default == _no_value:
self._default = False
def parse(self, x):
s = str(x).lower()
if s == "true":
return True
elif s == "false":
return False
else:
raise ValueError("cannot interpret '{}' as boolean".format(x))
DateIntervalParameter
| Attribute | Value |
|---|---|
| Source Location | luigi/parameter.py, lines 815-841
|
| Signature | class DateIntervalParameter(Parameter[date_interval.DateInterval])
|
| Import | from luigi import DateIntervalParameter
|
ListParameter
| Attribute | Value |
|---|---|
| Source Location | luigi/parameter.py, lines 1207-1355
|
| Signature | class ListParameter(Parameter[Tuple[Any, ...]])
|
| Import | from luigi import ListParameter
|
class ListParameter(Parameter[Tuple[Any, ...]]):
"""Parameter whose value is a list (stored as a tuple for hashability)."""
def __init__(self, *args, schema=None, **kwargs):
# optional JSON schema validation via jsonschema library
...
def parse(self, x):
return list(json.loads(x, object_pairs_hook=FrozenOrderedDict))
def serialize(self, x):
return json.dumps(x, cls=_DictParamEncoder)
DictParameter
| Attribute | Value |
|---|---|
| Source Location | luigi/parameter.py, lines 1065-1198
|
| Signature | class DictParameter(Parameter[Dict[Any, Any]])
|
| Import | from luigi import DictParameter
|
class DictParameter(Parameter[Dict[Any, Any]]):
"""Parameter whose value is a dict (stored as FrozenOrderedDict for hashability)."""
def __init__(self, *args, schema=None, **kwargs):
# optional JSON schema validation via jsonschema library
...
def parse(self, x):
if not isinstance(x, str):
return x
return json.loads(x, object_pairs_hook=FrozenOrderedDict)
def serialize(self, x):
return json.dumps(x, cls=_DictParamEncoder)
I/O Contract
| Direction | Name | Type | Description |
|---|---|---|---|
| Input | raw value (string or native) | str or native type |
Value from CLI, config file, or constructor. |
| Output | parse(x) result |
native type | The typed, validated parameter value. |
| Output | serialize(x) result |
str |
String representation for task IDs and scheduler communication. |
Usage Examples
Basic parameter types
import luigi
import datetime
class DailyReport(luigi.Task):
date = luigi.DateParameter(default=datetime.date.today())
threshold = luigi.IntParameter(default=100)
verbose = luigi.BoolParameter()
name = luigi.Parameter(default='report')
def output(self):
return luigi.LocalTarget(
'/data/reports/%s/%s_%d.csv' % (self.date, self.name, self.threshold)
)
def run(self):
if self.verbose:
print("Generating report for %s with threshold %d" % (self.date, self.threshold))
with self.output().open('w') as f:
f.write("date,metric\n")
f.write("%s,%d\n" % (self.date, self.threshold))
Run from the command line:
luigi --module my_tasks DailyReport --date 2026-01-15 --threshold 200 --verbose --local-scheduler
Date interval parameter (from the wordcount example)
import luigi
class InputText(luigi.ExternalTask):
date = luigi.DateParameter()
def output(self):
return luigi.LocalTarget(self.date.strftime('/var/tmp/text/%Y-%m-%d.txt'))
class WordCount(luigi.Task):
date_interval = luigi.DateIntervalParameter()
def requires(self):
return [InputText(date) for date in self.date_interval.dates()]
def output(self):
return luigi.LocalTarget('/var/tmp/text-count/%s' % self.date_interval)
def run(self):
count = {}
for f in self.input():
for line in f.open('r'):
for word in line.strip().split():
count[word] = count.get(word, 0) + 1
with self.output().open('w') as out:
for word, cnt in count.items():
out.write("%s\t%d\n" % (word, cnt))
List and Dict parameters
import luigi
class ConfigurableTask(luigi.Task):
features = luigi.ListParameter(default=['feature_a', 'feature_b'])
settings = luigi.DictParameter(default={'lr': 0.01, 'epochs': 10})
def output(self):
return luigi.LocalTarget('/data/model_output.txt')
def run(self):
with self.output().open('w') as f:
f.write("Features: %s\n" % str(self.features))
f.write("Learning rate: %s\n" % self.settings['lr'])
f.write("Epochs: %s\n" % self.settings['epochs'])
Run from the command line:
luigi --module my_tasks ConfigurableTask \
--features '["feature_x","feature_y","feature_z"]' \
--settings '{"lr": 0.001, "epochs": 50}' \
--local-scheduler
Insignificant parameters
import luigi
class SecureTask(luigi.Task):
date = luigi.DateParameter()
api_key = luigi.Parameter(significant=False) # does NOT affect task identity
def output(self):
return luigi.LocalTarget('/data/secure/%s.json' % self.date)
def run(self):
# api_key is available but not part of the task_id
print("Using API key: %s" % self.api_key)
with self.output().open('w') as f:
f.write('{"status": "ok"}\n')