Implementation:Apache Paimon Schema
| Knowledge Sources | |
|---|---|
| Domains | Table Schema, Data Model, Schema Validation |
| Last Updated | 2026-02-08 00:00 GMT |
Overview
Schema is a public API class representing a table schema definition, encapsulating fields, partition keys, primary keys, options, and an optional comment.
Description
Schema (public API since 0.4.0) is annotated with @Public and Jackson annotations for JSON serialization. The constructor performs comprehensive normalization: it extracts primary keys and partition keys from options if specified there (removing them from the options map to avoid duplication), validates that there are no duplicate field or key names, validates that partition and primary key columns exist in the field list, and forces primary key fields to be non-nullable.
The class provides a Builder inner class with a fluent API for constructing schemas programmatically. The builder uses AtomicInteger to auto-assign field IDs and ReassignFieldId for nested type ID assignment, ensuring unique identifiers throughout the schema hierarchy. The rowType() method converts the fields to a RowType for internal processing.
Schema is a foundational class in the Paimon public API that defines the structure of every table. It is referenced by GetTableResponse in the REST layer, by table creation and alteration flows, and by the internal TableSchema (which extends this with additional resolved metadata). The normalization and validation logic ensures schema integrity at construction time, preventing invalid schemas from being created.
Usage
Use Schema to define table structure when creating tables or to represent existing table schemas in catalog operations.
Code Reference
Source Location
- Repository: Apache_Paimon
- File: paimon-api/src/main/java/org/apache/paimon/schema/Schema.java
- Lines: 1-390
Signature
@Public
@JsonIgnoreProperties(ignoreUnknown = true)
public class Schema {
@JsonProperty("fields")
private final List<DataField> fields;
@JsonProperty("partitionKeys")
private final List<String> partitionKeys;
@JsonProperty("primaryKeys")
private final List<String> primaryKeys;
@JsonProperty("options")
private final Map<String, String> options;
@Nullable
@JsonProperty("comment")
private final String comment;
@JsonCreator
public Schema(
List<DataField> fields,
List<String> partitionKeys,
List<String> primaryKeys,
Map<String, String> options,
@Nullable String comment);
public RowType rowType();
public List<DataField> fields();
public List<String> partitionKeys();
public List<String> primaryKeys();
public Map<String, String> options();
public String comment();
public Schema copy(RowType rowType);
public static Schema.Builder newBuilder();
public static final class Builder {
public Builder column(String columnName, DataType dataType);
public Builder column(String columnName, DataType dataType, String description);
public Builder column(String columnName, DataType dataType,
String description, String defaultValue);
public Builder partitionKeys(String... columnNames);
public Builder partitionKeys(List<String> columnNames);
public Builder primaryKey(String... columnNames);
public Builder primaryKey(List<String> columnNames);
public Builder options(Map<String, String> options);
public Builder option(String key, String value);
public Builder comment(String comment);
public Schema build();
}
}
Import
import org.apache.paimon.schema.Schema;
I/O Contract
Inputs
| Name | Type | Required | Description |
|---|---|---|---|
| fields | List<DataField> | yes | Table columns with types |
| partitionKeys | List<String> | yes | Partition column names (can be empty) |
| primaryKeys | List<String> | yes | Primary key column names (can be empty) |
| options | Map<String, String> | yes | Table configuration options |
| comment | String | no | Table comment/description |
Outputs
| Name | Type | Description |
|---|---|---|
| fields | List<DataField> | Normalized fields with primary keys set non-nullable |
| partitionKeys | List<String> | Normalized partition keys |
| primaryKeys | List<String> | Normalized primary keys |
| options | Map<String, String> | Options with primary-key and partition removed |
| rowType | RowType | Fields as RowType |
Usage Examples
Creating Schema with Builder
import org.apache.paimon.schema.Schema;
import org.apache.paimon.types.DataTypes;
// Build schema using fluent API
Schema schema = Schema.newBuilder()
.column("id", DataTypes.INT(), "User ID")
.column("name", DataTypes.STRING(), "User name")
.column("age", DataTypes.INT(), "User age")
.column("email", DataTypes.STRING(), "Email address")
.column("created_at", DataTypes.TIMESTAMP(3), "Creation timestamp")
.primaryKey("id")
.partitionKeys("created_at")
.option("bucket", "4")
.option("file.format", "parquet")
.comment("User information table")
.build();
System.out.println("Fields: " + schema.fields().size());
System.out.println("Primary keys: " + schema.primaryKeys());
System.out.println("Partition keys: " + schema.partitionKeys());
Creating Schema with Multiple Primary Keys
import org.apache.paimon.schema.Schema;
import org.apache.paimon.types.DataTypes;
// Composite primary key
Schema orderSchema = Schema.newBuilder()
.column("order_id", DataTypes.BIGINT())
.column("item_id", DataTypes.BIGINT())
.column("quantity", DataTypes.INT())
.column("price", DataTypes.DECIMAL(10, 2))
.column("order_date", DataTypes.DATE())
.primaryKey("order_id", "item_id")
.partitionKeys("order_date")
.option("merge-engine", "deduplicate")
.build();
System.out.println("Composite PK: " + orderSchema.primaryKeys());
// Output: [order_id, item_id]
Creating Append-Only Table Schema
import org.apache.paimon.schema.Schema;
import org.apache.paimon.types.DataTypes;
// Schema without primary key (append-only)
Schema logSchema = Schema.newBuilder()
.column("timestamp", DataTypes.TIMESTAMP(3))
.column("level", DataTypes.STRING())
.column("message", DataTypes.STRING())
.column("source", DataTypes.STRING())
.partitionKeys("timestamp")
.option("bucket", "-1") // Dynamic bucket mode
.comment("Application logs")
.build();
System.out.println("Has primary key: " + !logSchema.primaryKeys().isEmpty());
// Output: Has primary key: false
Schema with Default Values
import org.apache.paimon.schema.Schema;
import org.apache.paimon.types.DataTypes;
// Schema with column default values
Schema productSchema = Schema.newBuilder()
.column("product_id", DataTypes.INT(), "Product ID", null)
.column("name", DataTypes.STRING(), "Product name", null)
.column("price", DataTypes.DECIMAL(10, 2), "Price", "0.00")
.column("stock", DataTypes.INT(), "Stock quantity", "0")
.column("active", DataTypes.BOOLEAN(), "Is active", "true")
.primaryKey("product_id")
.build();
// Access fields with defaults
for (DataField field : productSchema.fields()) {
if (field.defaultValue() != null) {
System.out.println(field.name() + " default: " + field.defaultValue());
}
}
Working with Existing Schema
import org.apache.paimon.schema.Schema;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.RowType;
// Get schema from catalog
GetTableResponse tableResponse = catalog.getTable(identifier);
Schema schema = tableResponse.schema();
// Extract information
List<DataField> fields = schema.fields();
List<String> primaryKeys = schema.primaryKeys();
List<String> partitionKeys = schema.partitionKeys();
Map<String, String> options = schema.options();
System.out.println("Table has " + fields.size() + " columns");
System.out.println("Primary key: " + primaryKeys);
System.out.println("Partitioned by: " + partitionKeys);
// Convert to RowType
RowType rowType = schema.rowType();
System.out.println("Row type: " + rowType);
// Copy with modified row type
RowType modifiedRowType = /* modified version */;
Schema modifiedSchema = schema.copy(modifiedRowType);
JSON Serialization
import org.apache.paimon.schema.Schema;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.utils.JsonSerdeUtil;
// Create schema
Schema schema = Schema.newBuilder()
.column("id", DataTypes.INT())
.column("name", DataTypes.STRING())
.primaryKey("id")
.build();
// Serialize to JSON
String json = JsonSerdeUtil.toJson(schema);
System.out.println("JSON: " + json);
// Deserialize from JSON
Schema deserialized = JsonSerdeUtil.fromJson(json, Schema.class);
System.out.println("Deserialized fields: " + deserialized.fields().size());