Implementation:Apache Paimon SchemaSerializer
| Knowledge Sources | |
|---|---|
| Domains | Schema Management, Serialization, JSON |
| Last Updated | 2026-02-08 00:00 GMT |
Overview
SchemaSerializer provides bidirectional JSON serialization and deserialization for TableSchema objects with version compatibility handling.
Description
SchemaSerializer is a specialized serialization utility that handles the conversion of TableSchema objects to and from JSON format. It implements both JsonSerializer and JsonDeserializer interfaces to provide complete bidirectional transformation capabilities, making it suitable for persisting schema metadata to storage or transmitting schema information over network protocols.
The serialization process captures all essential components of a table schema: version number, schema ID, field definitions with their data types, the highest field ID used (for tracking field additions), partition keys, primary keys, table options, optional comment, and creation timestamp. Each component is serialized using Jackson's JSON generator, with proper handling of collections and nested objects like DataField instances.
The deserialization process includes important backward compatibility logic to handle schemas created by older versions of Paimon. For schemas from version 0.7 or earlier, it applies a default bucket value of 1 if not explicitly set, maintaining consistency with the historical default. For schemas from version 0.8 or earlier, it defaults the file format to ORC if not specified, preserving the original default behavior. These compatibility measures ensure that schemas created with older Paimon versions can be correctly loaded and used with newer versions.
The implementation uses a singleton pattern (INSTANCE) for efficient reuse, as the serializer is stateless and thread-safe. It delegates field-level serialization to DataField's own serializeJson() method and uses DataTypeJsonParser for field deserialization, maintaining consistency with Paimon's type system serialization.
Usage
Use SchemaSerializer when persisting TableSchema to JSON format for storage in catalog metadata, schema files, or REST API responses, and when loading schema from these JSON representations. The serializer ensures version compatibility and proper handling of all schema components.
Code Reference
Source Location
- Repository: Apache_Paimon
- File: paimon-api/src/main/java/org/apache/paimon/schema/SchemaSerializer.java
Signature
public class SchemaSerializer
implements JsonSerializer<TableSchema>, JsonDeserializer<TableSchema> {
public static final SchemaSerializer INSTANCE = new SchemaSerializer();
@Override
public void serialize(TableSchema tableSchema, JsonGenerator generator)
throws IOException;
@Override
public TableSchema deserialize(JsonNode node);
}
Import
import org.apache.paimon.schema.SchemaSerializer;
I/O Contract
Inputs
| Name | Type | Required | Description |
|---|---|---|---|
| tableSchema | TableSchema | Yes | Schema object to serialize |
| generator | JsonGenerator | Yes | Jackson JSON generator for output |
| node | JsonNode | Yes | JSON node to deserialize from |
Outputs
| Name | Type | Description |
|---|---|---|
| serialize() | void | Writes schema to JSON generator |
| deserialize() | TableSchema | Returns reconstructed TableSchema object |
Usage Examples
import org.apache.paimon.schema.SchemaSerializer;
import org.apache.paimon.schema.TableSchema;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.JsonNode;
// Example 1: Serialize a schema to JSON string
List<DataField> fields = Arrays.asList(
new DataField(0, "id", DataTypes.BIGINT()),
new DataField(1, "name", DataTypes.STRING()),
new DataField(2, "age", DataTypes.INT()),
new DataField(3, "create_time", DataTypes.TIMESTAMP())
);
Map<String, String> options = new HashMap<>();
options.put("bucket", "8");
options.put("file.format", "parquet");
TableSchema schema = new TableSchema(
1, // version
1, // id
fields,
3, // highestFieldId
Arrays.asList("create_time"), // partitionKeys
Arrays.asList("id"), // primaryKeys
options,
"User table", // comment
System.currentTimeMillis() // timeMillis
);
ObjectMapper mapper = new ObjectMapper();
String json = mapper.writeValueAsString(schema);
System.out.println("Serialized schema: " + json);
// Example 2: Deserialize schema from JSON string
String schemaJson = "{\"version\":1,\"id\":1,\"fields\":[...],\"highestFieldId\":3,...}";
JsonNode node = mapper.readTree(schemaJson);
TableSchema deserializedSchema = SchemaSerializer.INSTANCE.deserialize(node);
System.out.println("Schema version: " + deserializedSchema.version());
System.out.println("Schema ID: " + deserializedSchema.id());
System.out.println("Fields: " + deserializedSchema.fields().size());
// Example 3: Round-trip serialization
TableSchema original = createTableSchema();
// Serialize
StringWriter writer = new StringWriter();
JsonGenerator generator = mapper.getFactory().createGenerator(writer);
SchemaSerializer.INSTANCE.serialize(original, generator);
generator.close();
String serialized = writer.toString();
// Deserialize
JsonNode parsed = mapper.readTree(serialized);
TableSchema restored = SchemaSerializer.INSTANCE.deserialize(parsed);
// Verify round-trip
assert original.id() == restored.id();
assert original.fields().size() == restored.fields().size();
// Example 4: Handle old version schema (0.7) with default bucket
String oldSchemaJson = """
{
"version": 0,
"id": 1,
"fields": [...],
"highestFieldId": 3,
"partitionKeys": [],
"primaryKeys": ["id"],
"options": {},
"timeMillis": 1234567890000
}
""";
JsonNode oldNode = mapper.readTree(oldSchemaJson);
TableSchema oldSchema = SchemaSerializer.INSTANCE.deserialize(oldNode);
// Bucket defaults to 1 for version <= 0.7
assert "1".equals(oldSchema.options().get("bucket"));
// Example 5: Handle version 0.8 schema with default file format
String v08SchemaJson = """
{
"version": 1,
"id": 1,
"fields": [...],
"highestFieldId": 3,
"partitionKeys": [],
"primaryKeys": ["id"],
"options": {},
"timeMillis": 1234567890000
}
""";
JsonNode v08Node = mapper.readTree(v08SchemaJson);
TableSchema v08Schema = SchemaSerializer.INSTANCE.deserialize(v08Node);
// File format defaults to "orc" for version <= 0.8
assert "orc".equals(v08Schema.options().get("file.format"));
// Example 6: Serialize schema with all optional fields
TableSchema fullSchema = new TableSchema(
2,
5,
fields,
10,
Arrays.asList("year", "month"),
Arrays.asList("id", "name"),
options,
"Complete user information table",
System.currentTimeMillis()
);
String fullJson = mapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(fullSchema);
System.out.println(fullJson);
// Example 7: Serialize schema without comment
TableSchema noCommentSchema = new TableSchema(
1,
1,
fields,
3,
Collections.emptyList(),
Arrays.asList("id"),
options,
null, // No comment
System.currentTimeMillis()
);
String noCommentJson = mapper.writeValueAsString(noCommentSchema);
// JSON will not include "comment" field
// Example 8: Save schema to file
import java.nio.file.Files;
import java.nio.file.Path;
TableSchema schema = createTableSchema();
String json = mapper.writerWithDefaultPrettyPrinter()
.writeValueAsString(schema);
Files.writeString(Path.of("/path/to/schema.json"), json);
// Example 9: Load schema from file
String loadedJson = Files.readString(Path.of("/path/to/schema.json"));
JsonNode loadedNode = mapper.readTree(loadedJson);
TableSchema loadedSchema = SchemaSerializer.INSTANCE.deserialize(loadedNode);
// Example 10: Use in REST API response
import javax.ws.rs.GET;
import javax.ws.rs.Produces;
import javax.ws.rs.core.MediaType;
@GET
@Path("/tables/{tableName}/schema")
@Produces(MediaType.APPLICATION_JSON)
public String getTableSchema(@PathParam("tableName") String tableName) {
TableSchema schema = catalogService.getSchema(tableName);
ObjectMapper mapper = new ObjectMapper();
return mapper.writeValueAsString(schema);
}
// Example 11: Validate deserialized schema
public boolean validateSchema(String json) {
try {
JsonNode node = mapper.readTree(json);
TableSchema schema = SchemaSerializer.INSTANCE.deserialize(node);
// Validation checks
if (schema.fields().isEmpty()) {
throw new IllegalArgumentException("Schema must have at least one field");
}
if (schema.highestFieldId() < schema.fields().size() - 1) {
throw new IllegalArgumentException("Invalid highestFieldId");
}
return true;
} catch (Exception e) {
System.err.println("Invalid schema: " + e.getMessage());
return false;
}
}