From e58ab3211cf086f6450404877e5ab3215ab36b22 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 25 Nov 2025 15:39:17 -0700 Subject: [PATCH 01/10] first pass --- .../test/utils/database-utils.ts | 21 +++ .../test/utils/schema-definitions.ts | 30 ++++ apps/framework-cli/src/cli/local_webserver.rs | 1 + .../framework-cli/src/cli/routines/migrate.rs | 5 + .../framework/core/infra_reality_checker.rs | 3 + .../framework/core/infrastructure/table.rs | 9 + .../src/framework/core/infrastructure_map.rs | 68 +++++++ apps/framework-cli/src/framework/core/plan.rs | 3 + .../src/framework/core/plan_validator.rs | 2 + .../src/framework/python/generate.rs | 38 +++- .../src/framework/python/utils.rs | 1 + .../src/framework/streaming/generate.rs | 1 + .../src/framework/typescript/generate.rs | 31 ++++ .../olap/clickhouse/diff_strategy.rs | 4 + .../infrastructure/olap/clickhouse/mapper.rs | 6 + .../src/infrastructure/olap/clickhouse/mod.rs | 135 ++++++++++++-- .../infrastructure/olap/clickhouse/model.rs | 1 + .../infrastructure/olap/clickhouse/queries.rs | 116 +++++++++++- .../olap/clickhouse/type_parser.rs | 1 + .../src/infrastructure/olap/ddl_ordering.rs | 9 + .../processes/kafka_clickhouse_sync.rs | 10 ++ .../src/utilities/validate_passthrough.rs | 35 ++++ .../llm-docs/python/table-setup.md | 63 ++++++- .../llm-docs/typescript/table-setup.md | 58 +++++- .../src/pages/moose/olap/_meta.tsx | 3 + .../src/pages/moose/olap/compression.mdx | 167 ++++++++++++++++++ .../pages/moose/olap/schema-optimization.mdx | 38 ++++ packages/protobuf/infrastructure_map.proto | 2 + packages/py-moose-lib/moose_lib/__init__.py | 5 + .../py-moose-lib/moose_lib/data_models.py | 35 ++++ packages/py-moose-lib/tests/test_codec.py | 76 ++++++++ .../ts-moose-lib/src/browserCompatible.ts | 1 + .../src/dataModels/dataModelTypes.ts | 1 + .../src/dataModels/typeConvert.ts | 15 ++ packages/ts-moose-lib/src/dataModels/types.ts | 28 +++ packages/ts-moose-lib/src/dmv2/internal.ts | 5 + .../tests/cluster-validation.test.ts | 1 + .../tests/olap-table-versioning.test.ts | 1 + .../ts-moose-lib/tests/typeConvert.test.ts | 33 ++++ templates/python-tests/src/ingest/models.py | 24 ++- .../typescript-tests/src/ingest/models.ts | 20 +++ 41 files changed, 1086 insertions(+), 20 deletions(-) create mode 100644 apps/framework-docs/src/pages/moose/olap/compression.mdx create mode 100644 packages/py-moose-lib/tests/test_codec.py diff --git a/apps/framework-cli-e2e/test/utils/database-utils.ts b/apps/framework-cli-e2e/test/utils/database-utils.ts index 28a86a2531..60ae5a38c1 100644 --- a/apps/framework-cli-e2e/test/utils/database-utils.ts +++ b/apps/framework-cli-e2e/test/utils/database-utils.ts @@ -255,6 +255,7 @@ export interface ExpectedColumn { type: string | RegExp; // Allow regex for complex type matching nullable?: boolean; comment?: string; + codec?: string | RegExp; } /** @@ -433,6 +434,26 @@ export const validateTableSchema = async ( `Column '${expectedCol.name}' comment mismatch: expected '${expectedCol.comment}', got '${actualCol.comment}'`, ); } + + // Codec validation (if specified) + if (expectedCol.codec !== undefined) { + const actualCodec = actualCol.codec_expression; + let codecMatches = false; + + if (typeof expectedCol.codec === "string") { + // Exact string match + codecMatches = actualCodec === expectedCol.codec; + } else if (expectedCol.codec instanceof RegExp) { + // Regex match for complex codec expressions + codecMatches = expectedCol.codec.test(actualCodec); + } + + if (!codecMatches) { + errors.push( + `Column '${expectedCol.name}' codec mismatch: expected '${expectedCol.codec}', got '${actualCodec}'`, + ); + } + } } // Check for unexpected columns (optional - could be made configurable) diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index ce622995ee..eb67b1db11 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -421,6 +421,21 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "payloadBasic", type: "JSON(count Int64, name String)" }, ], }, + // Codec compression test table + { + tableName: "CodecTest", + columns: [ + { name: "id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/, codec: /Delta.*LZ4/ }, + { name: "log_blob", type: "JSON", codec: "ZSTD(3)" }, + { name: "combination_hash", type: "Array(UInt64)", codec: "ZSTD(1)" }, + { name: "temperature", type: "Float64", codec: /Gorilla.*ZSTD/ }, + { name: "request_count", type: "Float64", codec: /DoubleDelta.*LZ4/ }, + { name: "user_agent", type: "String", codec: "ZSTD(3)" }, + { name: "tags", type: "Array(String)", codec: "LZ4" }, + { name: "status_code", type: "Float64" }, + ], + }, ]; // ============ PYTHON TEMPLATE SCHEMA DEFINITIONS ============ @@ -805,6 +820,21 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "payload_basic", type: "JSON(count Int64, name String)" }, ], }, + // Codec compression test table + { + tableName: "CodecTest", + columns: [ + { name: "id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/, codec: /Delta.*LZ4/ }, + { name: "log_blob", type: "JSON", codec: "ZSTD(3)" }, + { name: "combination_hash", type: "Array(UInt64)", codec: "ZSTD(1)" }, + { name: "temperature", type: "Float64", codec: /Gorilla.*ZSTD/ }, + { name: "request_count", type: "Float64", codec: /DoubleDelta.*LZ4/ }, + { name: "user_agent", type: "String", codec: "ZSTD(3)" }, + { name: "tags", type: "Array(String)", codec: "LZ4" }, + { name: "status_code", type: "Int64" }, + ], + }, ]; // ============ HELPER FUNCTIONS ============ diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 840252b1a9..94d0acd5c5 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3544,6 +3544,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 6edc7a8bea..1aa1f0fab1 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -763,6 +763,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -797,6 +798,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); table } @@ -1140,6 +1142,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, after_column: None, database: Some("bad_db".to_string()), @@ -1157,6 +1160,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, after_column: Column { name: "col".to_string(), @@ -1168,6 +1172,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, database: Some("another_bad_db".to_string()), cluster_name: None, diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index 17320e03ca..0d8670f4e3 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -515,6 +515,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -610,6 +611,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); let mock_client = MockOlapClient { @@ -679,6 +681,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; actual_table.columns.push(timestamp_col.clone()); infra_table.columns.push(timestamp_col); diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index a21336a8e4..a27c107d3a 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -600,6 +600,8 @@ pub struct Column { pub comment: Option, // Column comment for metadata storage #[serde(skip_serializing_if = "Option::is_none", default)] pub ttl: Option, + #[serde(skip_serializing_if = "Option::is_none", default)] + pub codec: Option, // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") } #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -1114,6 +1116,7 @@ impl Column { .collect(), comment: self.comment.clone(), ttl: self.ttl.clone(), + codec: self.codec.clone(), special_fields: Default::default(), } } @@ -1136,6 +1139,7 @@ impl Column { annotations, comment: proto.comment, ttl: proto.ttl, + codec: proto.codec, } } } @@ -1515,6 +1519,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let json = serde_json::to_string(&nested_column).unwrap(); @@ -1535,6 +1540,7 @@ mod tests { annotations: vec![], comment: Some("[MOOSE_METADATA:DO_NOT_MODIFY] {\"version\":1,\"enum\":{\"name\":\"TestEnum\",\"members\":[]}}".to_string()), ttl: None, + codec: None, }; // Convert to proto and back @@ -1558,6 +1564,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let proto = column_without_comment.to_proto(); @@ -1741,6 +1748,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "name".to_string(), @@ -1752,6 +1760,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 95ef3f7465..da2b8dee3c 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -3025,6 +3025,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "name".to_string(), @@ -3036,6 +3037,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "to_be_removed".to_string(), @@ -3047,6 +3049,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3081,6 +3084,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "name".to_string(), @@ -3092,6 +3096,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "age".to_string(), // New column @@ -3103,6 +3108,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "name".to_string()]), // Changed order_by @@ -3151,6 +3157,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "to_remove".to_string(), @@ -3162,6 +3169,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -3178,6 +3186,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "new_column".to_string(), @@ -3189,6 +3198,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -3324,6 +3334,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3355,6 +3366,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3383,6 +3395,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); after.columns.push(Column { @@ -3395,6 +3408,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3429,6 +3443,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "to_remove".to_string(), @@ -3440,6 +3455,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "to_modify".to_string(), @@ -3451,6 +3467,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]); @@ -3466,6 +3483,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "to_modify".to_string(), // modified @@ -3477,6 +3495,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "new_column".to_string(), // added @@ -3488,6 +3507,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]); @@ -3632,6 +3652,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); after.columns.push(Column { @@ -3644,6 +3665,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3677,6 +3699,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "name".to_string(), @@ -3688,6 +3711,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]); @@ -3703,6 +3727,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "id".to_string(), @@ -3714,6 +3739,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]); @@ -3741,6 +3767,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; before.columns.push(col.clone()); after.columns.push(col); @@ -3782,6 +3809,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); // Change every other column type in the after table @@ -3815,6 +3843,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); } @@ -3845,6 +3874,7 @@ mod diff_tests { ], comment: None, ttl: None, + codec: None, }); after.columns.push(Column { @@ -3860,6 +3890,7 @@ mod diff_tests { ], comment: None, ttl: None, + codec: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3900,6 +3931,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); after.columns.push(Column { @@ -3912,6 +3944,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); // Test special characters in column name @@ -3925,6 +3958,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); after.columns.push(Column { @@ -3937,6 +3971,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3961,6 +3996,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let col2 = col1.clone(); assert!(columns_are_equivalent(&col1, &col2)); @@ -3998,6 +4034,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let clickhouse_enum_col = Column { @@ -4022,6 +4059,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; // These should be equivalent due to the enum semantic comparison @@ -4047,6 +4085,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; assert!(!columns_are_equivalent( @@ -4065,6 +4104,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let int_col2 = Column { @@ -4077,6 +4117,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; assert!(!columns_are_equivalent(&int_col1, &int_col2)); @@ -4108,6 +4149,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let json_col2 = Column { @@ -4130,6 +4172,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; // These should be equivalent - order of typed_paths doesn't matter @@ -4155,6 +4198,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; assert!(!columns_are_equivalent(&json_col1, &json_col3)); @@ -4180,6 +4224,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; assert!(!columns_are_equivalent(&json_col1, &json_col4)); @@ -4222,6 +4267,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let nested_json_col2 = Column { @@ -4255,6 +4301,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; // These should be equivalent - order doesn't matter at any level @@ -4286,6 +4333,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "priority".to_string(), @@ -4297,6 +4345,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], jwt: false, @@ -4308,6 +4357,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let col_with_user_name = Column { @@ -4328,6 +4378,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "priority".to_string(), @@ -4339,6 +4390,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], jwt: false, @@ -4350,6 +4402,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; // These should be equivalent - name difference doesn't matter if structure matches @@ -4376,6 +4429,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], // Missing priority column jwt: false, }), @@ -4386,6 +4440,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; assert!(!columns_are_equivalent( @@ -4422,6 +4477,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "notifications".to_string(), @@ -4433,6 +4489,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], jwt: false, @@ -4444,6 +4501,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], jwt: false, }), @@ -4454,6 +4512,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], jwt: false, }), @@ -4464,6 +4523,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; let col_user = Column { @@ -4489,6 +4549,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "notifications".to_string(), @@ -4500,6 +4561,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], jwt: false, @@ -4511,6 +4573,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], jwt: false, }), @@ -4521,6 +4584,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], jwt: false, }), @@ -4531,6 +4595,7 @@ mod diff_tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; // These should be equivalent - name differences at all levels don't matter @@ -4890,6 +4955,7 @@ mod diff_topic_tests { annotations: Vec::new(), comment: None, ttl: None, + codec: None, }], metadata: None, life_cycle: LifeCycle::FullyManaged, @@ -5180,6 +5246,7 @@ mod diff_topic_to_table_sync_process_tests { annotations: Vec::new(), comment: None, ttl: None, + codec: None, }], version: Some(version.clone()), source_primitive: PrimitiveSignature { @@ -5303,6 +5370,7 @@ mod diff_topic_to_table_sync_process_tests { annotations: vec![("note".to_string(), Value::String("changed".to_string()))], comment: None, ttl: None, + codec: None, }]; assert_eq!( diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index c3978121c2..b9995366f9 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -509,6 +509,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -713,6 +714,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); // Create test project first to get the database name @@ -1090,6 +1092,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }); // Create mock OLAP client with the reality table diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 99f9dda6a5..b4a0236adf 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -149,6 +149,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -324,6 +325,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index a8be4e26d3..34a63fd330 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -557,7 +557,7 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri .unwrap(); writeln!( output, - "from moose_lib import clickhouse_default, LifeCycle, ClickHouseTTL" + "from moose_lib import clickhouse_default, clickhouse_codec, LifeCycle, ClickHouseTTL" ) .unwrap(); writeln!( @@ -677,6 +677,12 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri if let Some(ref ttl_expr) = column.ttl { type_str = format!("Annotated[{}, ClickHouseTTL({:?})]", type_str, ttl_expr); } + if let Some(ref codec_expr) = column.codec { + type_str = format!( + "Annotated[{}, clickhouse_codec({:?})]", + type_str, codec_expr + ); + } if let Some(ref default_expr) = column.default { type_str = format!( "Annotated[{}, clickhouse_default({:?})]", @@ -1045,6 +1051,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "timestamp".to_string(), @@ -1056,6 +1063,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "optional_text".to_string(), @@ -1067,6 +1075,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["primary_key".to_string()]), @@ -1100,7 +1109,7 @@ from enum import IntEnum, Enum from moose_lib import Key, IngestPipeline, IngestPipelineConfig, OlapTable, OlapConfig, clickhouse_datetime64, clickhouse_decimal, ClickhouseSize, StringToEnumMixin from moose_lib.data_models import ClickHouseJson from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon, FixedString -from moose_lib import clickhouse_default, LifeCycle, ClickHouseTTL +from moose_lib import clickhouse_default, clickhouse_codec, LifeCycle, ClickHouseTTL from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, SummingMergeTreeEngine, S3QueueEngine, ReplicatedMergeTreeEngine, ReplicatedReplacingMergeTreeEngine, ReplicatedAggregatingMergeTreeEngine, ReplicatedSummingMergeTreeEngine class Foo(BaseModel): @@ -1130,6 +1139,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "numbers".to_string(), @@ -1144,6 +1154,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "nested_numbers".to_string(), @@ -1161,6 +1172,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1215,6 +1227,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "city".to_string(), @@ -1226,6 +1239,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "zipCode".to_string(), @@ -1237,6 +1251,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, ], jwt: false, @@ -1255,6 +1270,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "address".to_string(), @@ -1266,6 +1282,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "addresses".to_string(), @@ -1280,6 +1297,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1337,6 +1355,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "data".to_string(), @@ -1348,6 +1367,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1405,6 +1425,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1461,6 +1482,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "version".to_string(), @@ -1472,6 +1494,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "is_deleted".to_string(), @@ -1483,6 +1506,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1530,6 +1554,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "coordinates".to_string(), @@ -1544,6 +1569,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "metadata".to_string(), @@ -1558,6 +1584,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1619,6 +1646,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "timestamp".to_string(), @@ -1630,6 +1658,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "email".to_string(), @@ -1641,6 +1670,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: Some("timestamp + INTERVAL 30 DAY".to_string()), + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), @@ -1687,6 +1717,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1752,6 +1783,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "payload".to_string(), @@ -1772,6 +1804,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1826,6 +1859,7 @@ user_table = OlapTable[User]("User", OlapConfig( annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/framework/python/utils.rs b/apps/framework-cli/src/framework/python/utils.rs index fb0717bed5..cf9a873642 100644 --- a/apps/framework-cli/src/framework/python/utils.rs +++ b/apps/framework-cli/src/framework/python/utils.rs @@ -53,6 +53,7 @@ impl ColumnBuilder { // are generated later when converting to ClickHouse columns comment: None, ttl: None, + codec: None, }) } } diff --git a/apps/framework-cli/src/framework/streaming/generate.rs b/apps/framework-cli/src/framework/streaming/generate.rs index 7a3446e302..f96692dc50 100644 --- a/apps/framework-cli/src/framework/streaming/generate.rs +++ b/apps/framework-cli/src/framework/streaming/generate.rs @@ -514,6 +514,7 @@ my_function = StreamingFunction( // for actual ClickHouse table columns to preserve enum definitions comment: None, ttl: None, + codec: None, }) .collect() } diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index cffd6cae21..c8d3b26b83 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -340,6 +340,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> "WithDefault", "LifeCycle", "ClickHouseTTL", + "Codec", ]; if uses_simple_aggregate { @@ -591,6 +592,11 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> if let Some(expr) = &column.ttl { type_str = format!("{type_str} & ClickHouseTTL<\"{}\">", expr); } + // Wrap with Codec if present + let type_str = match column.codec.as_ref() { + None => type_str, + Some(ref codec) => format!("{type_str} & Codec<{codec:?}>"), + }; let type_str = match column.default { None => type_str, Some(ref default) if type_str == "Date" => { @@ -938,6 +944,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "city".to_string(), @@ -949,6 +956,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "zip_code".to_string(), @@ -960,6 +968,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], jwt: false, @@ -978,6 +987,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "address".to_string(), @@ -989,6 +999,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "addresses".to_string(), @@ -1003,6 +1014,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1063,6 +1075,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "data".to_string(), @@ -1074,6 +1087,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1130,6 +1144,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1181,6 +1196,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "version".to_string(), @@ -1192,6 +1208,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "is_deleted".to_string(), @@ -1203,6 +1220,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1249,6 +1267,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }], sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1301,6 +1320,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "version".to_string(), @@ -1312,6 +1332,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "is_deleted".to_string(), @@ -1323,6 +1344,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], sample_by: None, @@ -1377,6 +1399,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["u64".to_string()]), partition_by: None, @@ -1451,6 +1474,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "status".to_string(), @@ -1462,6 +1486,7 @@ export const UserTable = new OlapTable("User", { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1518,6 +1543,7 @@ export const TaskTable = new OlapTable("Task", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "timestamp".to_string(), @@ -1529,6 +1555,7 @@ export const TaskTable = new OlapTable("Task", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "email".to_string(), @@ -1540,6 +1567,7 @@ export const TaskTable = new OlapTable("Task", { annotations: vec![], comment: None, ttl: Some("timestamp + INTERVAL 30 DAY".to_string()), + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), @@ -1588,6 +1616,7 @@ export const TaskTable = new OlapTable("Task", { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "payload".to_string(), @@ -1608,6 +1637,7 @@ export const TaskTable = new OlapTable("Task", { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1656,6 +1686,7 @@ export const TaskTable = new OlapTable("Task", { annotations: vec![], comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 46168bcc69..970c48a2f1 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -662,6 +662,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "timestamp".to_string(), @@ -673,6 +674,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(order_by), @@ -798,6 +800,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, position_after: Some("timestamp".to_string()), }]; @@ -855,6 +858,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, position_after: Some("timestamp".to_string()), }]; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index 2eecff31d0..9fb6273d6d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -83,6 +83,7 @@ pub fn std_column_to_clickhouse_column( default: column.default.clone(), comment, ttl: column.ttl.clone(), + codec: column.codec.clone(), }; Ok(clickhouse_column) @@ -426,6 +427,7 @@ mod tests { annotations: vec![], comment: Some("This is a user comment about the record type".to_string()), ttl: None, + codec: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_with_user_comment).unwrap(); @@ -450,6 +452,7 @@ mod tests { annotations: vec![], comment: Some(format!("Old user comment {}", old_metadata)), ttl: None, + codec: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_with_both).unwrap(); @@ -476,6 +479,7 @@ mod tests { annotations: vec![], comment: Some(old_metadata), ttl: None, + codec: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_metadata_only).unwrap(); @@ -518,6 +522,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "status".to_string(), @@ -529,6 +534,7 @@ mod tests { annotations: vec![], comment: Some("User status field".to_string()), // User comment ttl: None, + codec: None, }, ], jwt: false, diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 757bf54b4d..044a0b2ce6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -953,12 +953,14 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ // Build all the SQL statements needed (main modify + optional removes) let removing_default = before_column.default.is_some() && after_column.default.is_none(); let removing_ttl = before_column.ttl.is_some() && after_column.ttl.is_none(); + let removing_codec = before_column.codec.is_some() && after_column.codec.is_none(); let queries = build_modify_column_sql( db_name, table_name, &clickhouse_column, removing_default, removing_ttl, + removing_codec, cluster_name, )?; @@ -1013,6 +1015,7 @@ fn build_modify_column_sql( ch_col: &ClickHouseColumn, removing_default: bool, removing_ttl: bool, + removing_codec: bool, cluster_name: Option<&str>, ) -> Result, ClickhouseChangesError> { let column_type_string = basic_field_type_to_string(&ch_col.column_type)?; @@ -1040,6 +1043,14 @@ fn build_modify_column_sql( )); } + // Add REMOVE CODEC statement if needed + if removing_codec { + statements.push(format!( + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE CODEC", + db_name, table_name, cluster_clause, ch_col.name + )); + } + // DEFAULT clause: If omitted, ClickHouse KEEPS any existing DEFAULT // Therefore, DEFAULT removal requires a separate REMOVE DEFAULT statement // Default values from ClickHouse/Python are already properly formatted @@ -1061,29 +1072,39 @@ fn build_modify_column_sql( .map(|t| format!(" TTL {}", t)) .unwrap_or_default(); + // CODEC clause: If omitted, ClickHouse KEEPS any existing CODEC + // Therefore, CODEC removal requires a separate REMOVE CODEC statement + let codec_clause = ch_col + .codec + .as_ref() + .map(|c| format!(" CODEC({})", c)) + .unwrap_or_default(); + // Build the main MODIFY COLUMN statement let main_sql = if let Some(ref comment) = ch_col.comment { let escaped_comment = comment.replace('\'', "''"); format!( - "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{} COMMENT '{}'", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{} COMMENT '{}'", db_name, table_name, cluster_clause, ch_col.name, column_type_string, default_clause, + codec_clause, ttl_clause, escaped_comment ) } else { format!( - "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{}", db_name, table_name, cluster_clause, ch_col.name, column_type_string, default_clause, + codec_clause, ttl_clause ) }; @@ -1647,7 +1668,8 @@ impl OlapOperations for ConfiguredDBClient { is_in_primary_key, is_in_sorting_key, default_kind, - default_expression + default_expression, + compression_codec FROM system.columns WHERE database = '{db_name}' AND table = '{table_name}' @@ -1662,7 +1684,7 @@ impl OlapOperations for ConfiguredDBClient { let mut columns_cursor = self .client .query(&columns_query) - .fetch::<(String, String, String, u8, u8, String, String)>() + .fetch::<(String, String, String, u8, u8, String, String, String)>() .map_err(|e| { debug!("Error fetching columns for table {}: {}", table_name, e); OlapChangesError::DatabaseError(e.to_string()) @@ -1680,6 +1702,7 @@ impl OlapOperations for ConfiguredDBClient { is_sorting, default_kind, default_expression, + compression_codec, )) = columns_cursor .next() .await @@ -1806,6 +1829,19 @@ impl OlapOperations for ConfiguredDBClient { .get(&col_name) .map(|ttl| normalize_ttl_expression(ttl)); + // Parse codec if present + // Strip CODEC(...) wrapper from compression_codec (e.g., "CODEC(ZSTD(3))" -> "ZSTD(3)") + let codec = if !compression_codec.is_empty() { + let trimmed = compression_codec.trim(); + if trimmed.starts_with("CODEC(") && trimmed.ends_with(')') { + Some(trimmed[6..trimmed.len() - 1].to_string()) + } else { + Some(compression_codec) + } + } else { + None + }; + let column = Column { name: col_name.clone(), data_type, @@ -1816,6 +1852,7 @@ impl OlapOperations for ConfiguredDBClient { annotations, comment: column_comment, ttl: normalized_ttl, + codec, }; columns.push(column); @@ -2640,6 +2677,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: Some("Old user comment".to_string()), ttl: None, + codec: None, }; let after_column = Column { @@ -2658,6 +2696,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: Some("New user comment".to_string()), ttl: None, + codec: None, }; // The execute_modify_table_column function should detect this as comment-only change @@ -2683,6 +2722,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: Some("Number of things".to_string()), ttl: None, + codec: None, }; let after_column = Column { default: Some("42".to_string()), @@ -2690,7 +2730,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; let ch_after = std_column_to_clickhouse_column(after_column).unwrap(); - let sqls = build_modify_column_sql("db", "table", &ch_after, false, false, None).unwrap(); + let sqls = + build_modify_column_sql("db", "table", &ch_after, false, false, false, None).unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2714,6 +2755,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: Some("old".to_string()), ttl: None, + codec: None, }; let after_column = Column { @@ -2746,13 +2788,21 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: Some("Updated description field".to_string()), ttl: None, + codec: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); - let sqls = - build_modify_column_sql("test_db", "users", &clickhouse_column, false, false, None) - .unwrap(); + let sqls = build_modify_column_sql( + "test_db", + "users", + &clickhouse_column, + false, + false, + false, + None, + ) + .unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2776,6 +2826,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra default: Some("xxHash64(_id)".to_string()), // SQL function - no quotes comment: Some("Hash of the ID".to_string()), ttl: None, + codec: None, }; let sqls = build_modify_column_sql( @@ -2784,6 +2835,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra &sample_hash_col, false, false, + false, None, ) .unwrap(); @@ -2805,11 +2857,19 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra default: Some("now()".to_string()), // SQL function - no quotes comment: None, ttl: None, + codec: None, }; - let sqls = - build_modify_column_sql("test_db", "test_table", &created_at_col, false, false, None) - .unwrap(); + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &created_at_col, + false, + false, + false, + None, + ) + .unwrap(); assert_eq!(sqls.len(), 1); // The fix ensures now() is NOT quoted @@ -2828,11 +2888,19 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra default: Some("'active'".to_string()), // String literal - quotes preserved comment: None, ttl: None, + codec: None, }; - let sqls = - build_modify_column_sql("test_db", "test_table", &status_col, false, false, None) - .unwrap(); + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &status_col, + false, + false, + false, + None, + ) + .unwrap(); assert_eq!(sqls.len(), 1); // String literals should preserve their quotes @@ -3045,6 +3113,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: Some("Number of items".to_string()), ttl: None, + codec: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -3091,6 +3160,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: None, ttl: None, + codec: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -3140,6 +3210,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: None, ttl: Some("created_at + INTERVAL 7 DAY".to_string()), + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), @@ -3206,6 +3277,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra annotations: vec![], comment: None, ttl: Some("created_at + INTERVAL 7 DAY".to_string()), + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), @@ -3350,4 +3422,39 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra _ => panic!("Expected Table signature"), } } + + #[test] + fn test_codec_wrapper_stripping() { + let test_cases = vec![ + ("CODEC(ZSTD(3))", "ZSTD(3)"), + ("CODEC(Delta, LZ4)", "Delta, LZ4"), + ("CODEC(Gorilla, ZSTD(3))", "Gorilla, ZSTD(3)"), + ("CODEC(DoubleDelta)", "DoubleDelta"), + ("", ""), + ]; + + for (input, expected) in test_cases { + let result = if !input.is_empty() { + let trimmed = input.trim(); + if trimmed.starts_with("CODEC(") && trimmed.ends_with(')') { + Some(trimmed[6..trimmed.len() - 1].to_string()) + } else { + Some(input.to_string()) + } + } else { + None + }; + + if expected.is_empty() { + assert_eq!(result, None, "Failed for input: {}", input); + } else { + assert_eq!( + result, + Some(expected.to_string()), + "Failed for input: {}", + input + ); + } + } + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs index fbc134ea65..f978731301 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs @@ -432,6 +432,7 @@ pub struct ClickHouseColumn { pub default: Option, pub comment: Option, // Column comment for metadata storage pub ttl: Option, + pub codec: Option, // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") } impl ClickHouseColumn { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 154fc53b57..771f927df5 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -124,7 +124,7 @@ static CREATE_TABLE_TEMPLATE: &str = r#" CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} ( -{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#unless @last}}, +{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_codec}} CODEC({{{field_codec}}}){{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#unless @last}}, {{/unless}}{{/each}}{{#if has_indexes}}, {{#each indexes}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} ) ENGINE = {{engine}}{{#if primary_key_string}} @@ -3054,6 +3054,7 @@ fn builds_field_context(columns: &[ClickHouseColumn]) -> Result, Clic let escaped_comment = column.comment.as_ref().map(|c| c.replace('\'', "''")); let field_ttl = column.ttl.as_ref(); + let field_codec = column.codec.as_ref(); // Default values from ClickHouse/Python are already properly formatted // - String literals come with quotes: 'active' @@ -3066,6 +3067,7 @@ fn builds_field_context(columns: &[ClickHouseColumn]) -> Result, Clic "field_name": column.name, "field_type": field_type, "field_ttl": field_ttl, + "field_codec": field_codec, "field_default": formatted_default, "field_nullable": if let ClickHouseColumnType::Nullable(_) = column.column_type { // if type is Nullable, do not add extra specifier @@ -3103,6 +3105,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "nested_field_2".to_string(), @@ -3113,6 +3116,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "nested_field_3".to_string(), @@ -3123,6 +3127,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "nested_field_4".to_string(), @@ -3133,6 +3138,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "nested_field_5".to_string(), @@ -3143,6 +3149,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "nested_field_6".to_string(), @@ -3165,6 +3172,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "nested_field_7".to_string(), @@ -3175,6 +3183,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ]); @@ -3259,6 +3268,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "name".to_string(), @@ -3269,6 +3279,7 @@ mod tests { default: None, comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -3308,6 +3319,7 @@ PRIMARY KEY (`id`) default: Some("'abc'".to_string()), comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3345,6 +3357,7 @@ ENGINE = MergeTree default: Some("42".to_string()), comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3384,6 +3397,7 @@ ENGINE = MergeTree default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "sample_hash".to_string(), @@ -3394,6 +3408,7 @@ ENGINE = MergeTree default: Some("xxHash64(_id)".to_string()), // SQL function - no quotes comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "created_at".to_string(), @@ -3404,6 +3419,7 @@ ENGINE = MergeTree default: Some("now()".to_string()), // SQL function - no quotes comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -3443,6 +3459,7 @@ ENGINE = MergeTree default: None, comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -3483,6 +3500,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }], engine: ClickhouseEngine::ReplacingMergeTree { ver: None, @@ -3519,6 +3537,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "version".to_string(), @@ -3529,6 +3548,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3572,6 +3592,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "version".to_string(), @@ -3582,6 +3603,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "is_deleted".to_string(), @@ -3592,6 +3614,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3635,6 +3658,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }], sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3740,6 +3764,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "nested_data".to_string(), @@ -3753,6 +3778,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "field2".to_string(), @@ -3763,6 +3789,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ]), required: true, @@ -3771,6 +3798,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "status".to_string(), @@ -3793,6 +3821,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ], sample_by: None, @@ -3842,6 +3871,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ClickHouseColumn { name: "data".to_string(), @@ -3852,6 +3882,7 @@ ORDER BY (`id`) "#; default: None, comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -4326,6 +4357,7 @@ SETTINGS keeper_path = '/clickhouse/s3queue/test_table', mode = 'unordered', s3q default: None, comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -4873,6 +4905,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; default: None, comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -4920,6 +4953,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; default: None, comment: None, ttl: None, + codec: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -5017,6 +5051,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; default: None, comment: None, ttl: None, + codec: None, }; let cluster_clause = Some("test_cluster") @@ -5880,4 +5915,83 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; let result2 = ClickhouseEngine::try_from(another_invalid); assert!(result2.is_err(), "Should reject invalid format 'BadFormat'"); } + + #[test] + fn test_create_table_with_codec() { + let columns = vec![ + ClickHouseColumn { + name: "id".to_string(), + column_type: ClickHouseColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + comment: None, + ttl: None, + codec: None, + }, + ClickHouseColumn { + name: "log_blob".to_string(), + column_type: ClickHouseColumnType::Json(Default::default()), + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + codec: Some("ZSTD(3)".to_string()), + }, + ClickHouseColumn { + name: "timestamp".to_string(), + column_type: ClickHouseColumnType::DateTime64 { precision: 3 }, + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + codec: Some("Delta, LZ4".to_string()), + }, + ClickHouseColumn { + name: "tags".to_string(), + column_type: ClickHouseColumnType::Array(Box::new(ClickHouseColumnType::String)), + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + codec: Some("ZSTD(1)".to_string()), + }, + ]; + + let table = ClickHouseTable { + name: "test_table".to_string(), + version: None, + columns, + order_by: OrderBy::Fields(vec!["id".to_string()]), + engine: ClickhouseEngine::MergeTree, + table_ttl_setting: None, + partition_by: None, + sample_by: None, + table_settings: None, + indexes: vec![], + cluster_name: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + let expected = r#" +CREATE TABLE IF NOT EXISTS `test_db`.`test_table` +( + `id` String NOT NULL, + `log_blob` JSON NOT NULL CODEC(ZSTD(3)), + `timestamp` DateTime64(3) NOT NULL CODEC(Delta, LZ4), + `tags` Array(String) NOT NULL CODEC(ZSTD(1)) +) +ENGINE = MergeTree +PRIMARY KEY (`id`) +ORDER BY (`id`) +"#; + assert_eq!(query.trim(), expected.trim()); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs index fc2221a218..14a606179e 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs @@ -1678,6 +1678,7 @@ pub fn convert_ast_to_column_type( // system.columns queries, not from type string parsing. comment: None, ttl: None, + codec: None, }); } TupleElement::Unnamed(_) => { diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index ca9589426b..c2cc6cb282 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -1351,6 +1351,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, after_column: None, dependency_info: DependencyInfo { @@ -1673,6 +1674,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; // Create operations with correct dependencies @@ -2727,6 +2729,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }; // Create operations with signatures that work with the current implementation @@ -2914,6 +2917,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "old_column".to_string(), @@ -2925,6 +2929,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -2959,6 +2964,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "new_column".to_string(), @@ -2970,6 +2976,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3003,6 +3010,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }), ColumnChange::Added { column: Column { @@ -3015,6 +3023,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, position_after: Some("id".to_string()), }, diff --git a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs index bd7ccf8cca..19d69dbe49 100644 --- a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs +++ b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs @@ -1214,6 +1214,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "B".to_string(), @@ -1225,6 +1226,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "C".to_string(), @@ -1242,6 +1244,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "b".to_string(), @@ -1259,6 +1262,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "e".to_string(), @@ -1270,6 +1274,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "f".to_string(), @@ -1281,6 +1286,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], }), @@ -1291,6 +1297,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "c".to_string(), @@ -1302,6 +1309,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], }), @@ -1312,6 +1320,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "D".to_string(), @@ -1323,6 +1332,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ], }; diff --git a/apps/framework-cli/src/utilities/validate_passthrough.rs b/apps/framework-cli/src/utilities/validate_passthrough.rs index b938189eb5..087b8b4049 100644 --- a/apps/framework-cli/src/utilities/validate_passthrough.rs +++ b/apps/framework-cli/src/utilities/validate_passthrough.rs @@ -648,6 +648,7 @@ impl<'de, S: SerializeValue> Visitor<'de> for &mut ValueVisitor<'_, S> { annotations: vec![], comment: None, ttl: None, + codec: None, } }) .collect(); @@ -1316,6 +1317,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "int_col".to_string(), @@ -1327,6 +1329,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "float_col".to_string(), @@ -1338,6 +1341,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "bool_col".to_string(), @@ -1349,6 +1353,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "date_col".to_string(), @@ -1360,6 +1365,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -1394,6 +1400,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let json = r#" @@ -1428,6 +1435,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let json = r#" @@ -1469,6 +1477,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Test valid enum value @@ -1518,6 +1527,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "nested_int".to_string(), @@ -1529,6 +1539,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -1543,6 +1554,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "nested_object".to_string(), @@ -1558,6 +1570,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -1616,6 +1629,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "optional_field".to_string(), @@ -1627,6 +1641,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -1658,6 +1673,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "aud".to_string(), @@ -1669,6 +1685,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "exp".to_string(), @@ -1680,6 +1697,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -1694,6 +1712,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, Column { name: "jwt_object".to_string(), @@ -1709,6 +1728,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }, ]; @@ -1754,6 +1774,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Test valid map @@ -1811,6 +1832,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Test valid map with numeric keys (as strings in JSON) @@ -1865,6 +1887,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Min boundary 0 @@ -1908,6 +1931,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Min boundary -32768 @@ -1951,6 +1975,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 127usize; @@ -1996,6 +2021,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 255usize; @@ -2041,6 +2067,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let limit: BigUint = BigUint::from(1u8) << 256usize; @@ -2087,6 +2114,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Valid keys @@ -2127,6 +2155,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 255usize; @@ -2167,6 +2196,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let limit: BigUint = BigUint::from(1u8) << 256usize; @@ -2211,6 +2241,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; let json = r#" @@ -2242,6 +2273,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // missing nested path @@ -2274,6 +2306,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // null at the nested path counts as missing for non-nullable types @@ -2321,6 +2354,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Test 1: Two's complement value (what -1 becomes with naive cast) should be rejected @@ -2390,6 +2424,7 @@ mod tests { annotations: vec![], comment: None, ttl: None, + codec: None, }]; // Test negative values work with i64 diff --git a/apps/framework-docs/llm-docs/python/table-setup.md b/apps/framework-docs/llm-docs/python/table-setup.md index 9bbb2b1e1e..457121f27d 100644 --- a/apps/framework-docs/llm-docs/python/table-setup.md +++ b/apps/framework-docs/llm-docs/python/table-setup.md @@ -914,4 +914,65 @@ class DistributedEngine: 3. **Load distribution**: Balance write and read load across multiple servers 4. **Geographic distribution**: Place data closer to users in different regions -For more details, see the [ClickHouse Distributed documentation](https://clickhouse.com/docs/en/engines/table-engines/special/distributed). \ No newline at end of file +For more details, see the [ClickHouse Distributed documentation](https://clickhouse.com/docs/en/engines/table-engines/special/distributed). + +## Compression Codecs + +Specify per-column compression codecs to optimize storage and performance: + +```python +from typing import Annotated, Any +from moose_lib import OlapTable, OlapConfig, clickhouse_codec, UInt64 +from pydantic import BaseModel +from datetime import datetime + +class Metrics(BaseModel): + # Delta for timestamps and monotonically increasing values + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + + # Gorilla for floating point sensor data + temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] + + # DoubleDelta for counters and metrics + request_count: Annotated[float, clickhouse_codec("DoubleDelta, LZ4")] + + # ZSTD for text/JSON with compression level (1-22) + log_data: Annotated[Any, clickhouse_codec("ZSTD(9)")] + user_agent: Annotated[str, clickhouse_codec("ZSTD(3)")] + + # Compress array elements + tags: Annotated[list[str], clickhouse_codec("LZ4")] + event_ids: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] + + # No codec (uses ClickHouse default) + status_code: int + +metrics_table = OlapTable[Metrics]( + "Metrics", + OlapConfig(order_by_fields=["timestamp"]) +) +``` + +### Common Codecs +- **Delta/DoubleDelta**: For timestamps, counters, monotonic values +- **Gorilla**: For floating-point sensor data, temperatures, stock prices +- **ZSTD**: General-purpose with levels 1-22 (higher = better compression, slower) +- **LZ4**: Fast decompression, lower CPU usage + +### Codec Chains +Combine codecs (processed left-to-right): `"Delta, LZ4"` or `"Gorilla, ZSTD(3)"` + +### Combining with Other Annotations +```python +from moose_lib import clickhouse_default, ClickHouseTTL, UInt64 + +class Events(BaseModel): + # Codec + Default value + status: Annotated[str, clickhouse_default("'pending'"), clickhouse_codec("ZSTD(3)")] + + # Codec + TTL + email: Annotated[str, ClickHouseTTL("timestamp + INTERVAL 30 DAY"), clickhouse_codec("ZSTD(3)")] + + # Codec + Numeric type + event_count: Annotated[UInt64, clickhouse_codec("DoubleDelta, LZ4")] +``` diff --git a/apps/framework-docs/llm-docs/typescript/table-setup.md b/apps/framework-docs/llm-docs/typescript/table-setup.md index 59714c4a45..a1f55fd07c 100644 --- a/apps/framework-docs/llm-docs/typescript/table-setup.md +++ b/apps/framework-docs/llm-docs/typescript/table-setup.md @@ -723,4 +723,60 @@ You can verify your tables were created correctly using: moose ls ``` -Or by connecting directly to your local ClickHouse instance and running SQL commands. \ No newline at end of file +Or by connecting directly to your local ClickHouse instance and running SQL commands. + +## Compression Codecs + +Specify per-column compression codecs to optimize storage and performance: + +```typescript +import { Codec, DateTime, UInt64 } from '@514labs/moose-lib'; + +interface Metrics { + // Delta for timestamps and monotonically increasing values + timestamp: DateTime & Codec<"Delta, LZ4">; + + // Gorilla for floating point sensor data + temperature: number & Codec<"Gorilla, ZSTD(3)">; + + // DoubleDelta for counters and metrics + request_count: number & Codec<"DoubleDelta, LZ4">; + + // ZSTD for text/JSON with compression level (1-22) + log_data: Record & Codec<"ZSTD(9)">; + user_agent: string & Codec<"ZSTD(3)">; + + // Compress array elements + tags: string[] & Codec<"LZ4">; + event_ids: UInt64[] & Codec<"ZSTD(1)">; +} + +export const MetricsTable = new OlapTable("Metrics", { + orderByFields: ["timestamp"] +}); +``` + +### Common Codecs +- **Delta/DoubleDelta**: For timestamps, counters, monotonic values +- **Gorilla**: For floating-point sensor data, temperatures, stock prices +- **ZSTD**: General-purpose with levels 1-22 (higher = better compression, slower) +- **LZ4**: Fast decompression, lower CPU usage + +### Codec Chains +Combine codecs (processed left-to-right): `Delta, LZ4` or `Gorilla, ZSTD(3)` + +### Combining with Other Annotations +```typescript +import { ClickHouseDefault, ClickHouseTTL } from "@514labs/moose-lib"; + +interface Events { + // Codec + Default value + status: string & ClickHouseDefault<"'pending'"> & Codec<"ZSTD(3)">; + + // Codec + TTL + email: string & ClickHouseTTL<"timestamp + INTERVAL 30 DAY"> & Codec<"ZSTD(3)">; + + // Codec + Numeric type + event_count: UInt64 & Codec<"DoubleDelta, LZ4">; +} +``` diff --git a/apps/framework-docs/src/pages/moose/olap/_meta.tsx b/apps/framework-docs/src/pages/moose/olap/_meta.tsx index 4dbc6563d8..1d3db08d43 100644 --- a/apps/framework-docs/src/pages/moose/olap/_meta.tsx +++ b/apps/framework-docs/src/pages/moose/olap/_meta.tsx @@ -20,6 +20,9 @@ const rawMeta = { ttl: { title: "TTL (Time-to-Live)", }, + compression: { + title: "Compression Codecs", + }, "schema-optimization": { title: "Schema Optimization", }, diff --git a/apps/framework-docs/src/pages/moose/olap/compression.mdx b/apps/framework-docs/src/pages/moose/olap/compression.mdx new file mode 100644 index 0000000000..5c0bc47d9a --- /dev/null +++ b/apps/framework-docs/src/pages/moose/olap/compression.mdx @@ -0,0 +1,167 @@ +import { TypeScript, Python, LanguageSwitcher, Callout } from "@/components"; + + + +# Compression Codecs + +Moose lets you specify ClickHouse compression codecs per-column to optimize storage and query performance. Different codecs work better for different data types, and you can chain multiple codecs together. + +## When to use compression codecs + +- **Time series data**: Use `Delta` or `DoubleDelta` for timestamps and monotonically increasing values +- **Floating point metrics**: Use `Gorilla` codec for sensor data, temperatures, and other float values +- **Text and JSON**: Use `ZSTD` with compression levels (1-22) for large strings and JSON +- **High cardinality data**: Combine specialized codecs with general-purpose compression (e.g., `Delta, LZ4`) + +## Basic Usage + + +```typescript +import { OlapTable, Key, DateTime, Codec, UInt64 } from "@514labs/moose-lib"; + +interface Metrics { + id: Key; + // Delta codec for timestamps (monotonically increasing) + timestamp: DateTime & Codec<"Delta, LZ4">; + + // Gorilla codec for floating point sensor data + temperature: number & Codec<"Gorilla, ZSTD(3)">; + + // DoubleDelta for counters and metrics + request_count: number & Codec<"DoubleDelta, LZ4">; + + // ZSTD for text/JSON with compression level + log_data: Record & Codec<"ZSTD(3)">; + user_agent: string & Codec<"ZSTD(3)">; + + // Compress array elements + tags: string[] & Codec<"LZ4">; + event_ids: UInt64[] & Codec<"ZSTD(1)">; +} + +export const MetricsTable = new OlapTable("Metrics", { + orderByFields: ["id", "timestamp"] +}); +``` + + + +```python +from typing import Annotated, Any +from moose_lib import OlapTable, OlapConfig, Key, clickhouse_codec, UInt64 +from pydantic import BaseModel +from datetime import datetime + +class Metrics(BaseModel): + id: Key[str] + # Delta codec for timestamps (monotonically increasing) + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + + # Gorilla codec for floating point sensor data + temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] + + # DoubleDelta for counters and metrics + request_count: Annotated[float, clickhouse_codec("DoubleDelta, LZ4")] + + # ZSTD for text/JSON with compression level + log_data: Annotated[Any, clickhouse_codec("ZSTD(3)")] + user_agent: Annotated[str, clickhouse_codec("ZSTD(3)")] + + # Compress array elements + tags: Annotated[list[str], clickhouse_codec("LZ4")] + event_ids: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] + +metrics_table = OlapTable[Metrics]( + "Metrics", + OlapConfig(order_by_fields=["id", "timestamp"]) +) +``` + + +## Codec Chains + +You can chain multiple codecs together. Data is processed by each codec in sequence (left-to-right). + + +```typescript +interface Events { + // Delta compress timestamps, then apply LZ4 + timestamp: DateTime & Codec<"Delta, LZ4">; + + // Gorilla for floats, then ZSTD for extra compression + value: number & Codec<"Gorilla, ZSTD(3)">; +} +``` + + + +```python +class Events(BaseModel): + # Delta compress timestamps, then apply LZ4 + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + + # Gorilla for floats, then ZSTD for extra compression + value: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] +``` + + +## Combining with Other Annotations + +Codecs work alongside other ClickHouse annotations: + + +```typescript +import { ClickHouseDefault, ClickHouseTTL } from "@514labs/moose-lib"; + +interface UserEvents { + id: Key; + timestamp: DateTime & Codec<"Delta, LZ4">; + + // Codec + Default value + status: string & ClickHouseDefault<"'pending'"> & Codec<"ZSTD(3)">; + + // Codec + TTL + email: string & ClickHouseTTL<"timestamp + INTERVAL 30 DAY"> & Codec<"ZSTD(3)">; + + // Codec + Numeric type + event_count: UInt64 & Codec<"DoubleDelta, LZ4">; +} +``` + + + +```python +from moose_lib import clickhouse_default, ClickHouseTTL + +class UserEvents(BaseModel): + id: Key[str] + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + + # Codec + Default value + status: Annotated[str, clickhouse_default("'pending'"), clickhouse_codec("ZSTD(3)")] + + # Codec + TTL + email: Annotated[str, ClickHouseTTL("timestamp + INTERVAL 30 DAY"), clickhouse_codec("ZSTD(3)")] + + # Codec + Numeric type + event_count: Annotated[UInt64, clickhouse_codec("DoubleDelta, LZ4")] +``` + + +## Syncing from Remote Tables + +When using `moose init --from-remote` to introspect existing ClickHouse tables, Moose automatically captures codec definitions and generates the appropriate annotations in your data models. + +## Notes + +- Codec expressions must be valid ClickHouse codec syntax (without the `CODEC()` wrapper) +- ClickHouse may normalize codecs by adding default parameters (e.g., `Delta` becomes `Delta(4)`) +- Moose applies codec changes via migrations using `ALTER TABLE ... MODIFY COLUMN` +- Not all codecs work with all data types - ClickHouse will validate during table creation + +## Related + +- See [Supported Types](/moose/olap/supported-types) for all available column types +- See [Schema Optimization](/moose/olap/schema-optimization) for other performance techniques +- See [Applying Migrations](/moose/olap/apply-migrations) to roll out codec changes +- See [ClickHouse Compression Codecs](https://clickhouse.com/docs/en/sql-reference/statements/create/table#column_compression_codec) for detailed codec documentation diff --git a/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx b/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx index 69bf4bb0d5..51ee3df583 100644 --- a/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx +++ b/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx @@ -472,3 +472,41 @@ bad_user_events_table = OlapTable[UserEvent]("user_events", { }) ``` + +## Compression Codecs + +ClickHouse supports per-column compression codecs to optimize storage and query performance. Different codecs work better for different data types. + +**Quick examples:** +- Time series: Use `Delta` or `DoubleDelta` for timestamps and counters +- Floating point: Use `Gorilla` codec for sensor data and metrics +- Text/JSON: Use `ZSTD` with compression levels for large strings +- Combine codecs: Chain specialized compression with general-purpose (e.g., `Delta, LZ4`) + + +```typescript +import { Codec, DateTime, UInt64 } from "@514labs/moose-lib"; + +interface Metrics { + timestamp: DateTime & Codec<"Delta, LZ4">; // Timestamps + temperature: number & Codec<"Gorilla, ZSTD(3)">; // Float metrics + log_data: Record & Codec<"ZSTD(9)">; // JSON with heavy compression + event_ids: UInt64[] & Codec<"ZSTD(1)">; // Arrays +} +``` + + + +```python +from moose_lib import clickhouse_codec, UInt64 +from typing import Annotated, Any + +class Metrics(BaseModel): + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] # Timestamps + temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] # Float metrics + log_data: Annotated[Any, clickhouse_codec("ZSTD(9)")] # JSON with heavy compression + event_ids: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] # Arrays +``` + + +**See the [Compression Codecs](/moose/olap/compression) guide for detailed codec documentation and use cases.** diff --git a/packages/protobuf/infrastructure_map.proto b/packages/protobuf/infrastructure_map.proto index 96887f6167..df8c3c5e47 100644 --- a/packages/protobuf/infrastructure_map.proto +++ b/packages/protobuf/infrastructure_map.proto @@ -241,6 +241,8 @@ message Column { optional string comment = 8; // Column comment for metadata storage // Optional column-level TTL expression (without leading 'TTL') optional string ttl = 10; + // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") + optional string codec = 11; } enum SimpleColumnType { diff --git a/packages/py-moose-lib/moose_lib/__init__.py b/packages/py-moose-lib/moose_lib/__init__.py index 1fa7d5ca76..53bcf459a2 100644 --- a/packages/py-moose-lib/moose_lib/__init__.py +++ b/packages/py-moose-lib/moose_lib/__init__.py @@ -37,6 +37,11 @@ StringToEnumMixin, FixedString, ClickhouseFixedStringSize, + ClickhouseDefault, + clickhouse_default, + ClickHouseTTL, + ClickHouseCodec, + clickhouse_codec, # Integer types Int8, Int16, diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index dcf3c287db..6d47e7ad41 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -72,6 +72,33 @@ class ClickHouseTTL: expression: str +@dataclasses.dataclass(frozen=True) +class ClickHouseCodec: + expression: str + + +def clickhouse_codec(expression: str) -> ClickHouseCodec: + """ + Creates a CODEC annotation for column compression. + + Supports single codecs and codec chains. + + Args: + expression: Codec expression (e.g., "ZSTD(3)", "Delta, LZ4") + + Examples: + # Single codec with compression level + log_blob: Annotated[Any, clickhouse_codec("ZSTD(3)")] + + # Codec chain for time series + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + + # Codec chain for floats + temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD")] + """ + return ClickHouseCodec(expression=expression) + + @dataclasses.dataclass(frozen=True) class ClickHouseJson: max_dynamic_paths: int | None = None @@ -271,6 +298,7 @@ class Column(BaseModel): default: str | None = None annotations: list[Tuple[str, Any]] = [] ttl: str | None = None + codec: str | None = None def to_expr(self): # Lazy import to avoid circular dependency at import time @@ -619,6 +647,12 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: None, ) + # Extract CODEC expression from metadata, if provided + codec_expr = next( + (md.expression for md in mds if isinstance(md, ClickHouseCodec)), + None, + ) + columns.append( Column( name=column_name, @@ -629,6 +663,7 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: default=default_expr, annotations=annotations, ttl=ttl_expr, + codec=codec_expr, ) ) return columns diff --git a/packages/py-moose-lib/tests/test_codec.py b/packages/py-moose-lib/tests/test_codec.py new file mode 100644 index 0000000000..25138a8951 --- /dev/null +++ b/packages/py-moose-lib/tests/test_codec.py @@ -0,0 +1,76 @@ +from datetime import datetime +from typing import Annotated, Any +from pydantic import BaseModel +from moose_lib import Key, clickhouse_codec, UInt64 +from moose_lib.data_models import _to_columns + + +def test_codec_single(): + """Test single codec annotation converts to correct ClickHouse CODEC.""" + + class CodecTest(BaseModel): + id: Key[str] + data: Annotated[str, clickhouse_codec("ZSTD(3)")] + + columns = _to_columns(CodecTest) + by_name = {col.name: col for col in columns} + + assert by_name["data"].codec == "ZSTD(3)" + assert by_name["id"].codec is None + + +def test_codec_chain(): + """Test codec chain annotation (Delta, LZ4).""" + + class CodecChainTest(BaseModel): + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + value: Annotated[float, clickhouse_codec("Gorilla, ZSTD")] + + columns = _to_columns(CodecChainTest) + by_name = {col.name: col for col in columns} + + assert by_name["timestamp"].codec == "Delta, LZ4" + assert by_name["value"].codec == "Gorilla, ZSTD" + + +def test_codec_with_level(): + """Test codec with compression level.""" + + class CodecLevelTest(BaseModel): + log_blob: Annotated[Any, clickhouse_codec("ZSTD(3)")] + combination_hash: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] + + columns = _to_columns(CodecLevelTest) + by_name = {col.name: col for col in columns} + + assert by_name["log_blob"].codec == "ZSTD(3)" + assert by_name["combination_hash"].codec == "ZSTD(1)" + + +def test_codec_specialized(): + """Test specialized codecs.""" + + class SpecializedCodecTest(BaseModel): + timestamp: Annotated[datetime, clickhouse_codec("Delta")] + counter: Annotated[int, clickhouse_codec("DoubleDelta")] + temperature: Annotated[float, clickhouse_codec("Gorilla")] + + columns = _to_columns(SpecializedCodecTest) + by_name = {col.name: col for col in columns} + + assert by_name["timestamp"].codec == "Delta" + assert by_name["counter"].codec == "DoubleDelta" + assert by_name["temperature"].codec == "Gorilla" + + +def test_codec_none(): + """Test codec with NONE (uncompressed).""" + + class NoCodecTest(BaseModel): + data: Annotated[str, clickhouse_codec("NONE")] + + columns = _to_columns(NoCodecTest) + by_name = {col.name: col for col in columns} + + assert by_name["data"].codec == "NONE" + diff --git a/packages/ts-moose-lib/src/browserCompatible.ts b/packages/ts-moose-lib/src/browserCompatible.ts index d998aeda2b..f6226c2321 100644 --- a/packages/ts-moose-lib/src/browserCompatible.ts +++ b/packages/ts-moose-lib/src/browserCompatible.ts @@ -62,6 +62,7 @@ export { ClickHouseDefault, ClickHouseTTL, WithDefault, + Codec, // Added friendly aliases and numeric helpers DateTime, DateTime64, diff --git a/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts b/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts index 3a398121f8..ba932bc772 100644 --- a/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts +++ b/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts @@ -33,6 +33,7 @@ export interface Column { primary_key: boolean; default: string | null; ttl: string | null; + codec: string | null; annotations: [string, any][]; } diff --git a/packages/ts-moose-lib/src/dataModels/typeConvert.ts b/packages/ts-moose-lib/src/dataModels/typeConvert.ts index e66da7b6a9..739235f1a2 100644 --- a/packages/ts-moose-lib/src/dataModels/typeConvert.ts +++ b/packages/ts-moose-lib/src/dataModels/typeConvert.ts @@ -925,6 +925,20 @@ const handleDefaultWrapping = ( return undefined; }; +/** Detect ClickHouse Codec annotation on a type and return codec expression */ +const handleCodec = (t: ts.Type, checker: TypeChecker): string | null => { + const codecType = getTaggedType(t, checker, "_clickhouse_codec"); + if (codecType === null) { + return null; + } + if (!codecType.isStringLiteral()) { + throw new UnsupportedFeature( + 'Codec must use a string literal, e.g. Codec<"ZSTD(3)">', + ); + } + return codecType.value; +}; + export const toColumns = (t: ts.Type, checker: TypeChecker): Column[] => { if (checker.getIndexInfosOfType(t).length !== 0) { console.log("[CompilerPlugin]", checker.getIndexInfosOfType(t)); @@ -964,6 +978,7 @@ export const toColumns = (t: ts.Type, checker: TypeChecker): Column[] => { unique: false, default: defaultExpression ?? handleDefault(type, checker), ttl: handleTtl(type, checker), + codec: handleCodec(type, checker), annotations, }; }); diff --git a/packages/ts-moose-lib/src/dataModels/types.ts b/packages/ts-moose-lib/src/dataModels/types.ts index 25180b5bd0..af90f8bcd0 100644 --- a/packages/ts-moose-lib/src/dataModels/types.ts +++ b/packages/ts-moose-lib/src/dataModels/types.ts @@ -73,6 +73,34 @@ export type UInt64 = number & ClickHouseInt<"uint64">; export type Decimal

= string & ClickHouseDecimal; +/** + * Attach compression codec to a column type. + * + * Any valid ClickHouse codec expression is allowed. ClickHouse validates the codec at runtime. + * + * @template T The base data type + * @template CodecExpr The codec expression (single codec or chain) + * + * @example + * interface Metrics { + * // Single codec + * log_blob: string & Codec<"ZSTD(3)">; + * + * // Codec chain (processed left-to-right) + * timestamp: Date & Codec<"Delta, LZ4">; + * temperature: number & Codec<"Gorilla, ZSTD">; + * + * // Specialized codecs + * counter: number & Codec<"DoubleDelta">; + * + * // Can combine with other annotations + * count: UInt64 & Codec<"DoubleDelta, LZ4">; + * } + */ +export type Codec = { + _clickhouse_codec?: CodecExpr; +}; + export type ClickHouseFloat = tags.Type< Value extends "float32" ? "float" : "double" >; diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index 7f003e79dd..9132ed5cdb 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -1162,6 +1162,7 @@ export const dlqColumns: Column[] = [ default: null, annotations: [], ttl: null, + codec: null, }, { name: "errorMessage", @@ -1172,6 +1173,7 @@ export const dlqColumns: Column[] = [ default: null, annotations: [], ttl: null, + codec: null, }, { name: "errorType", @@ -1182,6 +1184,7 @@ export const dlqColumns: Column[] = [ default: null, annotations: [], ttl: null, + codec: null, }, { name: "failedAt", @@ -1192,6 +1195,7 @@ export const dlqColumns: Column[] = [ default: null, annotations: [], ttl: null, + codec: null, }, { name: "source", @@ -1202,6 +1206,7 @@ export const dlqColumns: Column[] = [ default: null, annotations: [], ttl: null, + codec: null, }, ]; diff --git a/packages/ts-moose-lib/tests/cluster-validation.test.ts b/packages/ts-moose-lib/tests/cluster-validation.test.ts index 6f3feea84c..c7939c3ce0 100644 --- a/packages/ts-moose-lib/tests/cluster-validation.test.ts +++ b/packages/ts-moose-lib/tests/cluster-validation.test.ts @@ -24,6 +24,7 @@ const createMockColumns = (fields: string[]): Column[] => primary_key: false, default: null, ttl: null, + codec: null, annotations: [], })); diff --git a/packages/ts-moose-lib/tests/olap-table-versioning.test.ts b/packages/ts-moose-lib/tests/olap-table-versioning.test.ts index 861555e374..9496bc2125 100644 --- a/packages/ts-moose-lib/tests/olap-table-versioning.test.ts +++ b/packages/ts-moose-lib/tests/olap-table-versioning.test.ts @@ -27,6 +27,7 @@ const createMockColumns = (fields: string[]): Column[] => primary_key: false, default: null, ttl: null, + codec: null, annotations: [], })); diff --git a/packages/ts-moose-lib/tests/typeConvert.test.ts b/packages/ts-moose-lib/tests/typeConvert.test.ts index e9cd56971b..79655162a4 100644 --- a/packages/ts-moose-lib/tests/typeConvert.test.ts +++ b/packages/ts-moose-lib/tests/typeConvert.test.ts @@ -46,6 +46,7 @@ function createProgramWithSource(tempDir: string, sourceText: string) { describe("typeConvert mappings for helper types", function () { this.timeout(20000); // Increase timeout for TypeScript compilation + it("maps DateTime, DateTime64, numeric aliases, Decimal and LowCardinality", function () { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "moose-typeconv-")); @@ -256,4 +257,36 @@ describe("typeConvert mappings for helper types", function () { expect(byName.id.data_type).to.equal("String"); expect(byName.created_at.data_type).to.equal("DateTime"); }); + + it("maps Codec annotations for compression", function () { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "moose-typeconv-")); + try { + const source = ` + import { Codec } from "@514labs/moose-lib"; + + export interface TestModel { + id: string; + log_blob: Record & Codec<"ZSTD(3)">; + timestamp: Date & Codec<"Delta, LZ4">; + temperature: number & Codec<"Gorilla, ZSTD(3)">; + user_agent: string & Codec<"ZSTD(3)">; + tags: string[] & Codec<"ZSTD(1)">; + no_codec: string; + } + `; + const { checker, type } = createProgramWithSource(tempDir, source); + const columns = toColumns(type, checker); + const byName = Object.fromEntries(columns.map((c) => [c.name, c])); + + expect(byName.id.codec).to.equal(null); + expect(byName.log_blob.codec).to.equal("ZSTD(3)"); + expect(byName.timestamp.codec).to.equal("Delta, LZ4"); + expect(byName.temperature.codec).to.equal("Gorilla, ZSTD(3)"); + expect(byName.user_agent.codec).to.equal("ZSTD(3)"); + expect(byName.tags.codec).to.equal("ZSTD(1)"); + expect(byName.no_codec.codec).to.equal(null); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); }); diff --git a/templates/python-tests/src/ingest/models.py b/templates/python-tests/src/ingest/models.py index 70124db444..c484e963cf 100644 --- a/templates/python-tests/src/ingest/models.py +++ b/templates/python-tests/src/ingest/models.py @@ -1,7 +1,7 @@ # This file was auto-generated by the framework. You can add data models or change the existing ones from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon -from moose_lib import Key, IngestPipeline, IngestPipelineConfig, StringToEnumMixin, clickhouse_default, OlapTable, \ +from moose_lib import Key, IngestPipeline, IngestPipelineConfig, StringToEnumMixin, clickhouse_default, clickhouse_codec, OlapTable, \ OlapConfig, MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, simple_aggregated, \ ClickhouseSize, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64, ClickhousePrecision from datetime import datetime, date @@ -684,3 +684,25 @@ class DateTimePrecisionTestData(BaseModel): "DateTimePrecisionOutput", StreamConfig(destination=datetime_precision_output_table) ) + + +# =======Codec Compression Test======= +class CodecTest(BaseModel): + """Test model for codec compression support.""" + id: Key[str] + timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + log_blob: Annotated[Any, clickhouse_codec("ZSTD(3)")] + combination_hash: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] + temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] + request_count: Annotated[float, clickhouse_codec("DoubleDelta, LZ4")] + user_agent: Annotated[str, clickhouse_codec("ZSTD(3)")] + tags: Annotated[list[str], clickhouse_codec("LZ4")] + status_code: int + + +codec_test_model = IngestPipeline[CodecTest]("CodecTest", IngestPipelineConfig( + ingest_api=True, + stream=True, + table=True, + dead_letter_queue=True +)) diff --git a/templates/typescript-tests/src/ingest/models.ts b/templates/typescript-tests/src/ingest/models.ts index 7dc1e25332..ec18a8a429 100644 --- a/templates/typescript-tests/src/ingest/models.ts +++ b/templates/typescript-tests/src/ingest/models.ts @@ -21,6 +21,7 @@ import { ClickHouseByteSize, ClickHouseJson, Int64, + Codec, } from "@514labs/moose-lib"; /** @@ -684,3 +685,22 @@ export const dateTimePrecisionOutputStream = new Stream("DateTimePrecisionOutput", { destination: DateTimePrecisionOutputTable, }); + +// =======Codec Compression Test======= +export interface CodecTest { + id: Key; + timestamp: DateTime & Codec<"Delta, LZ4">; + log_blob: Record & Codec<"ZSTD(3)">; + combination_hash: UInt64[] & Codec<"ZSTD(1)">; + temperature: number & Codec<"Gorilla, ZSTD(3)">; + request_count: number & Codec<"DoubleDelta, LZ4">; + user_agent: string & Codec<"ZSTD(3)">; + tags: string[] & Codec<"LZ4">; + status_code: number; +} + +export const CodecTestPipeline = new IngestPipeline("CodecTest", { + table: true, + stream: true, + ingestApi: true, +}); From c9dab4947f35975be215fb4e6ad4266609a5ae14 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 25 Nov 2025 17:16:32 -0700 Subject: [PATCH 02/10] fixes --- .../test/utils/schema-definitions.ts | 2 +- .../src/framework/core/infrastructure_map.rs | 65 +++++++++++++++++++ .../infrastructure/olap/clickhouse/queries.rs | 2 +- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index eb67b1db11..3f82e7b715 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -433,7 +433,7 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "request_count", type: "Float64", codec: /DoubleDelta.*LZ4/ }, { name: "user_agent", type: "String", codec: "ZSTD(3)" }, { name: "tags", type: "Array(String)", codec: "LZ4" }, - { name: "status_code", type: "Float64" }, + { name: "status_code", type: "Int64" }, ], }, ]; diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index da2b8dee3c..7d40efe32a 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -2777,6 +2777,7 @@ fn columns_are_equivalent(before: &Column, after: &Column) -> bool { || before.default != after.default || before.annotations != after.annotations || before.comment != after.comment + || before.codec != after.codec { return false; } @@ -4601,6 +4602,70 @@ mod diff_tests { // These should be equivalent - name differences at all levels don't matter assert!(columns_are_equivalent(&col_generated, &col_user)); } + + #[test] + fn test_columns_are_equivalent_with_codec() { + use crate::framework::core::infrastructure::table::{Column, ColumnType}; + + let base_col = Column { + name: "data".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }; + + // Test 1: Columns with same codec should be equivalent + let col_with_codec1 = Column { + codec: Some("ZSTD(3)".to_string()), + ..base_col.clone() + }; + let col_with_codec2 = Column { + codec: Some("ZSTD(3)".to_string()), + ..base_col.clone() + }; + assert!(columns_are_equivalent(&col_with_codec1, &col_with_codec2)); + + // Test 2: Columns with different codecs should not be equivalent + let col_with_different_codec = Column { + codec: Some("LZ4".to_string()), + ..base_col.clone() + }; + assert!(!columns_are_equivalent( + &col_with_codec1, + &col_with_different_codec + )); + + // Test 3: Column with codec vs column without codec should not be equivalent + assert!(!columns_are_equivalent(&col_with_codec1, &base_col)); + + // Test 4: Columns with codec chains should be detected as different + let col_with_chain1 = Column { + codec: Some("Delta, LZ4".to_string()), + ..base_col.clone() + }; + let col_with_chain2 = Column { + codec: Some("Delta, ZSTD".to_string()), + ..base_col.clone() + }; + assert!(!columns_are_equivalent(&col_with_chain1, &col_with_chain2)); + + // Test 5: Codec with different compression levels should be detected as different + let col_zstd3 = Column { + codec: Some("ZSTD(3)".to_string()), + ..base_col.clone() + }; + let col_zstd9 = Column { + codec: Some("ZSTD(9)".to_string()), + ..base_col.clone() + }; + assert!(!columns_are_equivalent(&col_zstd3, &col_zstd9)); + } } #[cfg(test)] diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 771f927df5..188b19e154 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -124,7 +124,7 @@ static CREATE_TABLE_TEMPLATE: &str = r#" CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} ( -{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_codec}} CODEC({{{field_codec}}}){{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#unless @last}}, +{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_codec}} CODEC({{{field_codec}}}){{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#unless @last}}, {{/unless}}{{/each}}{{#if has_indexes}}, {{#each indexes}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} ) ENGINE = {{engine}}{{#if primary_key_string}} From d7739f138aae11e58614c6b1ea06a6e583386dd4 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 25 Nov 2025 17:24:42 -0700 Subject: [PATCH 03/10] fix --- apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 044a0b2ce6..b2a028ab8f 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -1836,7 +1836,7 @@ impl OlapOperations for ConfiguredDBClient { if trimmed.starts_with("CODEC(") && trimmed.ends_with(')') { Some(trimmed[6..trimmed.len() - 1].to_string()) } else { - Some(compression_codec) + Some(trimmed.to_string()) } } else { None From f27d792993da968696bfe8fa7ac29700dcedc469 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 25 Nov 2025 18:57:27 -0700 Subject: [PATCH 04/10] fix --- .../test/utils/schema-definitions.ts | 4 +- .../src/framework/core/infrastructure_map.rs | 43 +++++- .../src/infrastructure/olap/clickhouse/mod.rs | 130 ++++++++++++++++++ templates/python-tests/src/ingest/models.py | 2 +- 4 files changed, 174 insertions(+), 5 deletions(-) diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index 3f82e7b715..c0bbebaee7 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -433,7 +433,7 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "request_count", type: "Float64", codec: /DoubleDelta.*LZ4/ }, { name: "user_agent", type: "String", codec: "ZSTD(3)" }, { name: "tags", type: "Array(String)", codec: "LZ4" }, - { name: "status_code", type: "Int64" }, + { name: "status_code", type: "Float64" }, ], }, ]; @@ -832,7 +832,7 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "request_count", type: "Float64", codec: /DoubleDelta.*LZ4/ }, { name: "user_agent", type: "String", codec: "ZSTD(3)" }, { name: "tags", type: "Array(String)", codec: "LZ4" }, - { name: "status_code", type: "Int64" }, + { name: "status_code", type: "Float64" }, ], }, ]; diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 7d40efe32a..22c47e397e 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -52,6 +52,7 @@ use crate::framework::core::infrastructure_map::Change::Added; use crate::framework::languages::SupportedLanguages; use crate::framework::python::datamodel_config::load_main_py; use crate::framework::scripts::Workflow; +use crate::infrastructure::olap::clickhouse::codec_expressions_are_equivalent; use crate::infrastructure::olap::clickhouse::config::DEFAULT_DATABASE_NAME; use crate::infrastructure::redis::redis_client::RedisClient; use crate::project::Project; @@ -2769,7 +2770,7 @@ fn ttl_expressions_are_equivalent(before: &Option, after: &Option bool { - // Check all non-data_type and non-ttl fields first + // Check all non-data_type and non-ttl and non-codec fields first if before.name != after.name || before.required != after.required || before.unique != after.unique @@ -2777,7 +2778,6 @@ fn columns_are_equivalent(before: &Column, after: &Column) -> bool { || before.default != after.default || before.annotations != after.annotations || before.comment != after.comment - || before.codec != after.codec { return false; } @@ -2787,6 +2787,12 @@ fn columns_are_equivalent(before: &Column, after: &Column) -> bool { return false; } + // Special handling for codec comparison: normalize both expressions before comparing + // This handles cases where ClickHouse adds default parameters (e.g., Delta → Delta(4)) + if !codec_expressions_are_equivalent(&before.codec, &after.codec) { + return false; + } + // Use ClickHouse-specific semantic comparison for data types // This handles special cases like enums and JSON types with order-independent typed_paths use crate::infrastructure::olap::clickhouse::diff_strategy::column_types_are_equivalent; @@ -4665,6 +4671,39 @@ mod diff_tests { ..base_col.clone() }; assert!(!columns_are_equivalent(&col_zstd3, &col_zstd9)); + + // Test 6: Normalized codec comparison - user "Delta" vs ClickHouse "Delta(4)" + let col_user_delta = Column { + codec: Some("Delta".to_string()), + ..base_col.clone() + }; + let col_ch_delta = Column { + codec: Some("Delta(4)".to_string()), + ..base_col.clone() + }; + assert!(columns_are_equivalent(&col_user_delta, &col_ch_delta)); + + // Test 7: Normalized codec comparison - user "Gorilla" vs ClickHouse "Gorilla(8)" + let col_user_gorilla = Column { + codec: Some("Gorilla".to_string()), + ..base_col.clone() + }; + let col_ch_gorilla = Column { + codec: Some("Gorilla(8)".to_string()), + ..base_col.clone() + }; + assert!(columns_are_equivalent(&col_user_gorilla, &col_ch_gorilla)); + + // Test 8: Normalized chain comparison - "Delta, LZ4" vs "Delta(4), LZ4" + let col_user_chain = Column { + codec: Some("Delta, LZ4".to_string()), + ..base_col.clone() + }; + let col_ch_chain = Column { + codec: Some("Delta(4), LZ4".to_string()), + ..base_col.clone() + }; + assert!(columns_are_equivalent(&col_user_chain, &col_ch_chain)); } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index b2a028ab8f..a1a853c3ac 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -2340,6 +2340,34 @@ pub fn extract_table_ttl_from_create_query(create_query: &str) -> Option /// - "timestamp + INTERVAL 1 MONTH" → "timestamp + toIntervalMonth(1)" /// - "timestamp + INTERVAL 90 DAY DELETE" → "timestamp + toIntervalDay(90)" /// - "timestamp + toIntervalDay(90) DELETE" → "timestamp + toIntervalDay(90)" +pub fn normalize_codec_expression(expr: &str) -> String { + expr.split(',') + .map(|codec| { + let trimmed = codec.trim(); + match trimmed { + "Delta" => "Delta(4)", + "Gorilla" => "Gorilla(8)", + "ZSTD" => "ZSTD(1)", + // DoubleDelta, LZ4, NONE, and any codec with params stay as-is + _ => trimmed, + } + }) + .collect::>() + .join(", ") +} + +/// Checks if two codec expressions are semantically equivalent after normalization. +/// +/// This handles cases where ClickHouse normalizes codecs by adding default parameters. +/// For example, "Delta, LZ4" from user code is equivalent to "Delta(4), LZ4" from ClickHouse. +pub fn codec_expressions_are_equivalent(before: &Option, after: &Option) -> bool { + match (before, after) { + (None, None) => true, + (Some(b), Some(a)) => normalize_codec_expression(b) == normalize_codec_expression(a), + _ => false, + } +} + pub fn normalize_ttl_expression(expr: &str) -> String { use regex::Regex; @@ -2972,6 +3000,108 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ); } + #[test] + fn test_normalize_codec_expression() { + // Test single codec without params - should add defaults + assert_eq!(normalize_codec_expression("Delta"), "Delta(4)"); + assert_eq!(normalize_codec_expression("Gorilla"), "Gorilla(8)"); + assert_eq!(normalize_codec_expression("ZSTD"), "ZSTD(1)"); + + // Test codecs with params - should stay as-is + assert_eq!(normalize_codec_expression("Delta(4)"), "Delta(4)"); + assert_eq!(normalize_codec_expression("Gorilla(8)"), "Gorilla(8)"); + assert_eq!(normalize_codec_expression("ZSTD(3)"), "ZSTD(3)"); + assert_eq!(normalize_codec_expression("ZSTD(9)"), "ZSTD(9)"); + + // Test codecs that don't have default params + assert_eq!(normalize_codec_expression("DoubleDelta"), "DoubleDelta"); + assert_eq!(normalize_codec_expression("LZ4"), "LZ4"); + assert_eq!(normalize_codec_expression("NONE"), "NONE"); + + // Test codec chains + assert_eq!(normalize_codec_expression("Delta, LZ4"), "Delta(4), LZ4"); + assert_eq!( + normalize_codec_expression("Gorilla, ZSTD"), + "Gorilla(8), ZSTD(1)" + ); + assert_eq!( + normalize_codec_expression("Delta, ZSTD(3)"), + "Delta(4), ZSTD(3)" + ); + assert_eq!( + normalize_codec_expression("DoubleDelta, LZ4"), + "DoubleDelta, LZ4" + ); + + // Test whitespace handling + assert_eq!(normalize_codec_expression("Delta,LZ4"), "Delta(4), LZ4"); + assert_eq!( + normalize_codec_expression(" Delta , LZ4 "), + "Delta(4), LZ4" + ); + + // Test already normalized expressions + assert_eq!(normalize_codec_expression("Delta(4), LZ4"), "Delta(4), LZ4"); + assert_eq!( + normalize_codec_expression("Gorilla(8), ZSTD(3)"), + "Gorilla(8), ZSTD(3)" + ); + } + + #[test] + fn test_codec_expressions_are_equivalent() { + // Test None vs None + assert!(codec_expressions_are_equivalent(&None, &None)); + + // Test Some vs None + assert!(!codec_expressions_are_equivalent( + &Some("ZSTD(3)".to_string()), + &None + )); + + // Test same codec + assert!(codec_expressions_are_equivalent( + &Some("ZSTD(3)".to_string()), + &Some("ZSTD(3)".to_string()) + )); + + // Test normalization: user writes "Delta", ClickHouse returns "Delta(4)" + assert!(codec_expressions_are_equivalent( + &Some("Delta".to_string()), + &Some("Delta(4)".to_string()) + )); + + // Test normalization: user writes "Gorilla", ClickHouse returns "Gorilla(8)" + assert!(codec_expressions_are_equivalent( + &Some("Gorilla".to_string()), + &Some("Gorilla(8)".to_string()) + )); + + // Test normalization: user writes "ZSTD", ClickHouse returns "ZSTD(1)" + assert!(codec_expressions_are_equivalent( + &Some("ZSTD".to_string()), + &Some("ZSTD(1)".to_string()) + )); + + // Test chain normalization + assert!(codec_expressions_are_equivalent( + &Some("Delta, LZ4".to_string()), + &Some("Delta(4), LZ4".to_string()) + )); + + // Test different codecs + assert!(!codec_expressions_are_equivalent( + &Some("ZSTD(3)".to_string()), + &Some("ZSTD(9)".to_string()) + )); + + // Test different chains + assert!(!codec_expressions_are_equivalent( + &Some("Delta, LZ4".to_string()), + &Some("Delta, ZSTD".to_string()) + )); + } + #[test] fn test_normalize_ttl_expression() { // Test DAY conversion diff --git a/templates/python-tests/src/ingest/models.py b/templates/python-tests/src/ingest/models.py index c484e963cf..7d629e7791 100644 --- a/templates/python-tests/src/ingest/models.py +++ b/templates/python-tests/src/ingest/models.py @@ -697,7 +697,7 @@ class CodecTest(BaseModel): request_count: Annotated[float, clickhouse_codec("DoubleDelta, LZ4")] user_agent: Annotated[str, clickhouse_codec("ZSTD(3)")] tags: Annotated[list[str], clickhouse_codec("LZ4")] - status_code: int + status_code: float codec_test_model = IngestPipeline[CodecTest]("CodecTest", IngestPipelineConfig( From 49378d38d4c6c95f70e7c8384ccb671f051864f4 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 26 Nov 2025 11:47:42 -0700 Subject: [PATCH 05/10] rename --- .../src/framework/python/generate.rs | 9 +-- .../src/framework/typescript/generate.rs | 4 +- .../llm-docs/python/table-setup.md | 22 ++++---- .../llm-docs/typescript/table-setup.md | 20 +++---- .../src/pages/moose/olap/compression.mdx | 56 +++++++++---------- .../pages/moose/olap/schema-optimization.mdx | 18 +++--- packages/py-moose-lib/moose_lib/__init__.py | 1 - .../py-moose-lib/moose_lib/data_models.py | 22 -------- packages/py-moose-lib/tests/test_codec.py | 20 +++---- .../ts-moose-lib/src/browserCompatible.ts | 2 +- .../src/dataModels/typeConvert.ts | 2 +- packages/ts-moose-lib/src/dataModels/types.ts | 12 ++-- .../ts-moose-lib/tests/typeConvert.test.ts | 12 ++-- templates/python-tests/src/ingest/models.py | 16 +++--- .../typescript-tests/src/ingest/models.ts | 16 +++--- 15 files changed, 103 insertions(+), 129 deletions(-) diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index 34a63fd330..f351c2a60c 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -557,7 +557,7 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri .unwrap(); writeln!( output, - "from moose_lib import clickhouse_default, clickhouse_codec, LifeCycle, ClickHouseTTL" + "from moose_lib import clickhouse_default, ClickHouseCodec, LifeCycle, ClickHouseTTL" ) .unwrap(); writeln!( @@ -678,10 +678,7 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri type_str = format!("Annotated[{}, ClickHouseTTL({:?})]", type_str, ttl_expr); } if let Some(ref codec_expr) = column.codec { - type_str = format!( - "Annotated[{}, clickhouse_codec({:?})]", - type_str, codec_expr - ); + type_str = format!("Annotated[{}, ClickHouseCodec({:?})]", type_str, codec_expr); } if let Some(ref default_expr) = column.default { type_str = format!( @@ -1109,7 +1106,7 @@ from enum import IntEnum, Enum from moose_lib import Key, IngestPipeline, IngestPipelineConfig, OlapTable, OlapConfig, clickhouse_datetime64, clickhouse_decimal, ClickhouseSize, StringToEnumMixin from moose_lib.data_models import ClickHouseJson from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon, FixedString -from moose_lib import clickhouse_default, clickhouse_codec, LifeCycle, ClickHouseTTL +from moose_lib import clickhouse_default, ClickHouseCodec, LifeCycle, ClickHouseTTL from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, SummingMergeTreeEngine, S3QueueEngine, ReplicatedMergeTreeEngine, ReplicatedReplacingMergeTreeEngine, ReplicatedAggregatingMergeTreeEngine, ReplicatedSummingMergeTreeEngine class Foo(BaseModel): diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index c8d3b26b83..e1645c8e27 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -340,7 +340,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> "WithDefault", "LifeCycle", "ClickHouseTTL", - "Codec", + "ClickHouseCodec", ]; if uses_simple_aggregate { @@ -595,7 +595,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> // Wrap with Codec if present let type_str = match column.codec.as_ref() { None => type_str, - Some(ref codec) => format!("{type_str} & Codec<{codec:?}>"), + Some(ref codec) => format!("{type_str} & ClickHouseCodec<{codec:?}>"), }; let type_str = match column.default { None => type_str, diff --git a/apps/framework-docs/llm-docs/python/table-setup.md b/apps/framework-docs/llm-docs/python/table-setup.md index 457121f27d..65da3efc7f 100644 --- a/apps/framework-docs/llm-docs/python/table-setup.md +++ b/apps/framework-docs/llm-docs/python/table-setup.md @@ -922,27 +922,27 @@ Specify per-column compression codecs to optimize storage and performance: ```python from typing import Annotated, Any -from moose_lib import OlapTable, OlapConfig, clickhouse_codec, UInt64 +from moose_lib import OlapTable, OlapConfig, ClickHouseCodec, UInt64 from pydantic import BaseModel from datetime import datetime class Metrics(BaseModel): # Delta for timestamps and monotonically increasing values - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")] # Gorilla for floating point sensor data - temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] + temperature: Annotated[float, ClickHouseCodec("Gorilla, ZSTD(3)")] # DoubleDelta for counters and metrics - request_count: Annotated[float, clickhouse_codec("DoubleDelta, LZ4")] + request_count: Annotated[float, ClickHouseCodec("DoubleDelta, LZ4")] # ZSTD for text/JSON with compression level (1-22) - log_data: Annotated[Any, clickhouse_codec("ZSTD(9)")] - user_agent: Annotated[str, clickhouse_codec("ZSTD(3)")] + log_data: Annotated[Any, ClickHouseCodec("ZSTD(9)")] + user_agent: Annotated[str, ClickHouseCodec("ZSTD(3)")] # Compress array elements - tags: Annotated[list[str], clickhouse_codec("LZ4")] - event_ids: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] + tags: Annotated[list[str], ClickHouseCodec("LZ4")] + event_ids: Annotated[list[UInt64], ClickHouseCodec("ZSTD(1)")] # No codec (uses ClickHouse default) status_code: int @@ -968,11 +968,11 @@ from moose_lib import clickhouse_default, ClickHouseTTL, UInt64 class Events(BaseModel): # Codec + Default value - status: Annotated[str, clickhouse_default("'pending'"), clickhouse_codec("ZSTD(3)")] + status: Annotated[str, clickhouse_default("'pending'"), ClickHouseCodec("ZSTD(3)")] # Codec + TTL - email: Annotated[str, ClickHouseTTL("timestamp + INTERVAL 30 DAY"), clickhouse_codec("ZSTD(3)")] + email: Annotated[str, ClickHouseTTL("timestamp + INTERVAL 30 DAY"), ClickHouseCodec("ZSTD(3)")] # Codec + Numeric type - event_count: Annotated[UInt64, clickhouse_codec("DoubleDelta, LZ4")] + event_count: Annotated[UInt64, ClickHouseCodec("DoubleDelta, LZ4")] ``` diff --git a/apps/framework-docs/llm-docs/typescript/table-setup.md b/apps/framework-docs/llm-docs/typescript/table-setup.md index a1f55fd07c..4e93d9e020 100644 --- a/apps/framework-docs/llm-docs/typescript/table-setup.md +++ b/apps/framework-docs/llm-docs/typescript/table-setup.md @@ -734,21 +734,21 @@ import { Codec, DateTime, UInt64 } from '@514labs/moose-lib'; interface Metrics { // Delta for timestamps and monotonically increasing values - timestamp: DateTime & Codec<"Delta, LZ4">; + timestamp: DateTime & ClickHouseCodec<"Delta, LZ4">; // Gorilla for floating point sensor data - temperature: number & Codec<"Gorilla, ZSTD(3)">; + temperature: number & ClickHouseCodec<"Gorilla, ZSTD(3)">; // DoubleDelta for counters and metrics - request_count: number & Codec<"DoubleDelta, LZ4">; + request_count: number & ClickHouseCodec<"DoubleDelta, LZ4">; // ZSTD for text/JSON with compression level (1-22) - log_data: Record & Codec<"ZSTD(9)">; - user_agent: string & Codec<"ZSTD(3)">; + log_data: Record & ClickHouseCodec<"ZSTD(9)">; + user_agent: string & ClickHouseCodec<"ZSTD(3)">; // Compress array elements - tags: string[] & Codec<"LZ4">; - event_ids: UInt64[] & Codec<"ZSTD(1)">; + tags: string[] & ClickHouseCodec<"LZ4">; + event_ids: UInt64[] & ClickHouseCodec<"ZSTD(1)">; } export const MetricsTable = new OlapTable("Metrics", { @@ -771,12 +771,12 @@ import { ClickHouseDefault, ClickHouseTTL } from "@514labs/moose-lib"; interface Events { // Codec + Default value - status: string & ClickHouseDefault<"'pending'"> & Codec<"ZSTD(3)">; + status: string & ClickHouseDefault<"'pending'"> & ClickHouseCodec<"ZSTD(3)">; // Codec + TTL - email: string & ClickHouseTTL<"timestamp + INTERVAL 30 DAY"> & Codec<"ZSTD(3)">; + email: string & ClickHouseTTL<"timestamp + INTERVAL 30 DAY"> & ClickHouseCodec<"ZSTD(3)">; // Codec + Numeric type - event_count: UInt64 & Codec<"DoubleDelta, LZ4">; + event_count: UInt64 & ClickHouseCodec<"DoubleDelta, LZ4">; } ``` diff --git a/apps/framework-docs/src/pages/moose/olap/compression.mdx b/apps/framework-docs/src/pages/moose/olap/compression.mdx index 5c0bc47d9a..08dd9bd197 100644 --- a/apps/framework-docs/src/pages/moose/olap/compression.mdx +++ b/apps/framework-docs/src/pages/moose/olap/compression.mdx @@ -22,21 +22,21 @@ import { OlapTable, Key, DateTime, Codec, UInt64 } from "@514labs/moose-lib"; interface Metrics { id: Key; // Delta codec for timestamps (monotonically increasing) - timestamp: DateTime & Codec<"Delta, LZ4">; + timestamp: DateTime & ClickHouseCodec<"Delta, LZ4">; // Gorilla codec for floating point sensor data - temperature: number & Codec<"Gorilla, ZSTD(3)">; + temperature: number & ClickHouseCodec<"Gorilla, ZSTD(3)">; // DoubleDelta for counters and metrics - request_count: number & Codec<"DoubleDelta, LZ4">; + request_count: number & ClickHouseCodec<"DoubleDelta, LZ4">; // ZSTD for text/JSON with compression level - log_data: Record & Codec<"ZSTD(3)">; - user_agent: string & Codec<"ZSTD(3)">; + log_data: Record & ClickHouseCodec<"ZSTD(3)">; + user_agent: string & ClickHouseCodec<"ZSTD(3)">; // Compress array elements - tags: string[] & Codec<"LZ4">; - event_ids: UInt64[] & Codec<"ZSTD(1)">; + tags: string[] & ClickHouseCodec<"LZ4">; + event_ids: UInt64[] & ClickHouseCodec<"ZSTD(1)">; } export const MetricsTable = new OlapTable("Metrics", { @@ -48,28 +48,28 @@ export const MetricsTable = new OlapTable("Metrics", { ```python from typing import Annotated, Any -from moose_lib import OlapTable, OlapConfig, Key, clickhouse_codec, UInt64 +from moose_lib import OlapTable, OlapConfig, Key, ClickHouseCodec, UInt64 from pydantic import BaseModel from datetime import datetime class Metrics(BaseModel): id: Key[str] # Delta codec for timestamps (monotonically increasing) - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")] # Gorilla codec for floating point sensor data - temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] + temperature: Annotated[float, ClickHouseCodec("Gorilla, ZSTD(3)")] # DoubleDelta for counters and metrics - request_count: Annotated[float, clickhouse_codec("DoubleDelta, LZ4")] + request_count: Annotated[float, ClickHouseCodec("DoubleDelta, LZ4")] # ZSTD for text/JSON with compression level - log_data: Annotated[Any, clickhouse_codec("ZSTD(3)")] - user_agent: Annotated[str, clickhouse_codec("ZSTD(3)")] + log_data: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + user_agent: Annotated[str, ClickHouseCodec("ZSTD(3)")] # Compress array elements - tags: Annotated[list[str], clickhouse_codec("LZ4")] - event_ids: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] + tags: Annotated[list[str], ClickHouseCodec("LZ4")] + event_ids: Annotated[list[UInt64], ClickHouseCodec("ZSTD(1)")] metrics_table = OlapTable[Metrics]( "Metrics", @@ -86,10 +86,10 @@ You can chain multiple codecs together. Data is processed by each codec in seque ```typescript interface Events { // Delta compress timestamps, then apply LZ4 - timestamp: DateTime & Codec<"Delta, LZ4">; + timestamp: DateTime & ClickHouseCodec<"Delta, LZ4">; // Gorilla for floats, then ZSTD for extra compression - value: number & Codec<"Gorilla, ZSTD(3)">; + value: number & ClickHouseCodec<"Gorilla, ZSTD(3)">; } ``` @@ -98,10 +98,10 @@ interface Events { ```python class Events(BaseModel): # Delta compress timestamps, then apply LZ4 - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")] # Gorilla for floats, then ZSTD for extra compression - value: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] + value: Annotated[float, ClickHouseCodec("Gorilla, ZSTD(3)")] ``` @@ -115,36 +115,36 @@ import { ClickHouseDefault, ClickHouseTTL } from "@514labs/moose-lib"; interface UserEvents { id: Key; - timestamp: DateTime & Codec<"Delta, LZ4">; + timestamp: DateTime & ClickHouseCodec<"Delta, LZ4">; // Codec + Default value - status: string & ClickHouseDefault<"'pending'"> & Codec<"ZSTD(3)">; + status: string & ClickHouseDefault<"'pending'"> & ClickHouseCodec<"ZSTD(3)">; // Codec + TTL - email: string & ClickHouseTTL<"timestamp + INTERVAL 30 DAY"> & Codec<"ZSTD(3)">; + email: string & ClickHouseTTL<"timestamp + INTERVAL 30 DAY"> & ClickHouseCodec<"ZSTD(3)">; // Codec + Numeric type - event_count: UInt64 & Codec<"DoubleDelta, LZ4">; + event_count: UInt64 & ClickHouseCodec<"DoubleDelta, LZ4">; } ``` ```python -from moose_lib import clickhouse_default, ClickHouseTTL +from moose_lib import clickhouse_default, ClickHouseTTL, ClickHouseCodec class UserEvents(BaseModel): id: Key[str] - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] + timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")] # Codec + Default value - status: Annotated[str, clickhouse_default("'pending'"), clickhouse_codec("ZSTD(3)")] + status: Annotated[str, clickhouse_default("'pending'"), ClickHouseCodec("ZSTD(3)")] # Codec + TTL - email: Annotated[str, ClickHouseTTL("timestamp + INTERVAL 30 DAY"), clickhouse_codec("ZSTD(3)")] + email: Annotated[str, ClickHouseTTL("timestamp + INTERVAL 30 DAY"), ClickHouseCodec("ZSTD(3)")] # Codec + Numeric type - event_count: Annotated[UInt64, clickhouse_codec("DoubleDelta, LZ4")] + event_count: Annotated[UInt64, ClickHouseCodec("DoubleDelta, LZ4")] ``` diff --git a/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx b/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx index 51ee3df583..2cc959490a 100644 --- a/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx +++ b/apps/framework-docs/src/pages/moose/olap/schema-optimization.mdx @@ -488,24 +488,24 @@ ClickHouse supports per-column compression codecs to optimize storage and query import { Codec, DateTime, UInt64 } from "@514labs/moose-lib"; interface Metrics { - timestamp: DateTime & Codec<"Delta, LZ4">; // Timestamps - temperature: number & Codec<"Gorilla, ZSTD(3)">; // Float metrics - log_data: Record & Codec<"ZSTD(9)">; // JSON with heavy compression - event_ids: UInt64[] & Codec<"ZSTD(1)">; // Arrays + timestamp: DateTime & ClickHouseCodec<"Delta, LZ4">; // Timestamps + temperature: number & ClickHouseCodec<"Gorilla, ZSTD(3)">; // Float metrics + log_data: Record & ClickHouseCodec<"ZSTD(9)">; // JSON with heavy compression + event_ids: UInt64[] & ClickHouseCodec<"ZSTD(1)">; // Arrays } ``` ```python -from moose_lib import clickhouse_codec, UInt64 +from moose_lib import ClickHouseCodec, UInt64 from typing import Annotated, Any class Metrics(BaseModel): - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] # Timestamps - temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] # Float metrics - log_data: Annotated[Any, clickhouse_codec("ZSTD(9)")] # JSON with heavy compression - event_ids: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] # Arrays + timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")] # Timestamps + temperature: Annotated[float, ClickHouseCodec("Gorilla, ZSTD(3)")] # Float metrics + log_data: Annotated[Any, ClickHouseCodec("ZSTD(9)")] # JSON with heavy compression + event_ids: Annotated[list[UInt64], ClickHouseCodec("ZSTD(1)")] # Arrays ``` diff --git a/packages/py-moose-lib/moose_lib/__init__.py b/packages/py-moose-lib/moose_lib/__init__.py index 53bcf459a2..d50c9c72a5 100644 --- a/packages/py-moose-lib/moose_lib/__init__.py +++ b/packages/py-moose-lib/moose_lib/__init__.py @@ -41,7 +41,6 @@ clickhouse_default, ClickHouseTTL, ClickHouseCodec, - clickhouse_codec, # Integer types Int8, Int16, diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index 6d47e7ad41..dc0a3591b6 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -77,28 +77,6 @@ class ClickHouseCodec: expression: str -def clickhouse_codec(expression: str) -> ClickHouseCodec: - """ - Creates a CODEC annotation for column compression. - - Supports single codecs and codec chains. - - Args: - expression: Codec expression (e.g., "ZSTD(3)", "Delta, LZ4") - - Examples: - # Single codec with compression level - log_blob: Annotated[Any, clickhouse_codec("ZSTD(3)")] - - # Codec chain for time series - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] - - # Codec chain for floats - temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD")] - """ - return ClickHouseCodec(expression=expression) - - @dataclasses.dataclass(frozen=True) class ClickHouseJson: max_dynamic_paths: int | None = None diff --git a/packages/py-moose-lib/tests/test_codec.py b/packages/py-moose-lib/tests/test_codec.py index 25138a8951..e52f24ae7b 100644 --- a/packages/py-moose-lib/tests/test_codec.py +++ b/packages/py-moose-lib/tests/test_codec.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import Annotated, Any from pydantic import BaseModel -from moose_lib import Key, clickhouse_codec, UInt64 +from moose_lib import Key, ClickHouseCodec, UInt64 from moose_lib.data_models import _to_columns @@ -10,7 +10,7 @@ def test_codec_single(): class CodecTest(BaseModel): id: Key[str] - data: Annotated[str, clickhouse_codec("ZSTD(3)")] + data: Annotated[str, ClickHouseCodec("ZSTD(3)")] columns = _to_columns(CodecTest) by_name = {col.name: col for col in columns} @@ -23,8 +23,8 @@ def test_codec_chain(): """Test codec chain annotation (Delta, LZ4).""" class CodecChainTest(BaseModel): - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] - value: Annotated[float, clickhouse_codec("Gorilla, ZSTD")] + timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")] + value: Annotated[float, ClickHouseCodec("Gorilla, ZSTD")] columns = _to_columns(CodecChainTest) by_name = {col.name: col for col in columns} @@ -37,8 +37,8 @@ def test_codec_with_level(): """Test codec with compression level.""" class CodecLevelTest(BaseModel): - log_blob: Annotated[Any, clickhouse_codec("ZSTD(3)")] - combination_hash: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + combination_hash: Annotated[list[UInt64], ClickHouseCodec("ZSTD(1)")] columns = _to_columns(CodecLevelTest) by_name = {col.name: col for col in columns} @@ -51,9 +51,9 @@ def test_codec_specialized(): """Test specialized codecs.""" class SpecializedCodecTest(BaseModel): - timestamp: Annotated[datetime, clickhouse_codec("Delta")] - counter: Annotated[int, clickhouse_codec("DoubleDelta")] - temperature: Annotated[float, clickhouse_codec("Gorilla")] + timestamp: Annotated[datetime, ClickHouseCodec("Delta")] + counter: Annotated[int, ClickHouseCodec("DoubleDelta")] + temperature: Annotated[float, ClickHouseCodec("Gorilla")] columns = _to_columns(SpecializedCodecTest) by_name = {col.name: col for col in columns} @@ -67,7 +67,7 @@ def test_codec_none(): """Test codec with NONE (uncompressed).""" class NoCodecTest(BaseModel): - data: Annotated[str, clickhouse_codec("NONE")] + data: Annotated[str, ClickHouseCodec("NONE")] columns = _to_columns(NoCodecTest) by_name = {col.name: col for col in columns} diff --git a/packages/ts-moose-lib/src/browserCompatible.ts b/packages/ts-moose-lib/src/browserCompatible.ts index f6226c2321..a0d74719ac 100644 --- a/packages/ts-moose-lib/src/browserCompatible.ts +++ b/packages/ts-moose-lib/src/browserCompatible.ts @@ -62,7 +62,7 @@ export { ClickHouseDefault, ClickHouseTTL, WithDefault, - Codec, + ClickHouseCodec, // Added friendly aliases and numeric helpers DateTime, DateTime64, diff --git a/packages/ts-moose-lib/src/dataModels/typeConvert.ts b/packages/ts-moose-lib/src/dataModels/typeConvert.ts index 739235f1a2..93b0c778de 100644 --- a/packages/ts-moose-lib/src/dataModels/typeConvert.ts +++ b/packages/ts-moose-lib/src/dataModels/typeConvert.ts @@ -933,7 +933,7 @@ const handleCodec = (t: ts.Type, checker: TypeChecker): string | null => { } if (!codecType.isStringLiteral()) { throw new UnsupportedFeature( - 'Codec must use a string literal, e.g. Codec<"ZSTD(3)">', + 'ClickHouseCodec must use a string literal, e.g. ClickHouseCodec<"ZSTD(3)">', ); } return codecType.value; diff --git a/packages/ts-moose-lib/src/dataModels/types.ts b/packages/ts-moose-lib/src/dataModels/types.ts index af90f8bcd0..cca3b934bd 100644 --- a/packages/ts-moose-lib/src/dataModels/types.ts +++ b/packages/ts-moose-lib/src/dataModels/types.ts @@ -84,20 +84,20 @@ export type Decimal

= string & * @example * interface Metrics { * // Single codec - * log_blob: string & Codec<"ZSTD(3)">; + * log_blob: string & ClickHouseCodec<"ZSTD(3)">; * * // Codec chain (processed left-to-right) - * timestamp: Date & Codec<"Delta, LZ4">; - * temperature: number & Codec<"Gorilla, ZSTD">; + * timestamp: Date & ClickHouseCodec<"Delta, LZ4">; + * temperature: number & ClickHouseCodec<"Gorilla, ZSTD">; * * // Specialized codecs - * counter: number & Codec<"DoubleDelta">; + * counter: number & ClickHouseCodec<"DoubleDelta">; * * // Can combine with other annotations - * count: UInt64 & Codec<"DoubleDelta, LZ4">; + * count: UInt64 & ClickHouseCodec<"DoubleDelta, LZ4">; * } */ -export type Codec = { +export type ClickHouseCodec = { _clickhouse_codec?: CodecExpr; }; diff --git a/packages/ts-moose-lib/tests/typeConvert.test.ts b/packages/ts-moose-lib/tests/typeConvert.test.ts index 79655162a4..b4658b8655 100644 --- a/packages/ts-moose-lib/tests/typeConvert.test.ts +++ b/packages/ts-moose-lib/tests/typeConvert.test.ts @@ -262,15 +262,15 @@ describe("typeConvert mappings for helper types", function () { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "moose-typeconv-")); try { const source = ` - import { Codec } from "@514labs/moose-lib"; + import { ClickHouseCodec } from "@514labs/moose-lib"; export interface TestModel { id: string; - log_blob: Record & Codec<"ZSTD(3)">; - timestamp: Date & Codec<"Delta, LZ4">; - temperature: number & Codec<"Gorilla, ZSTD(3)">; - user_agent: string & Codec<"ZSTD(3)">; - tags: string[] & Codec<"ZSTD(1)">; + log_blob: Record & ClickHouseCodec<"ZSTD(3)">; + timestamp: Date & ClickHouseCodec<"Delta, LZ4">; + temperature: number & ClickHouseCodec<"Gorilla, ZSTD(3)">; + user_agent: string & ClickHouseCodec<"ZSTD(3)">; + tags: string[] & ClickHouseCodec<"ZSTD(1)">; no_codec: string; } `; diff --git a/templates/python-tests/src/ingest/models.py b/templates/python-tests/src/ingest/models.py index 7d629e7791..acffbb3887 100644 --- a/templates/python-tests/src/ingest/models.py +++ b/templates/python-tests/src/ingest/models.py @@ -1,7 +1,7 @@ # This file was auto-generated by the framework. You can add data models or change the existing ones from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon -from moose_lib import Key, IngestPipeline, IngestPipelineConfig, StringToEnumMixin, clickhouse_default, clickhouse_codec, OlapTable, \ +from moose_lib import Key, IngestPipeline, IngestPipelineConfig, StringToEnumMixin, clickhouse_default, ClickHouseCodec, OlapTable, \ OlapConfig, MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, simple_aggregated, \ ClickhouseSize, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64, ClickhousePrecision from datetime import datetime, date @@ -690,13 +690,13 @@ class DateTimePrecisionTestData(BaseModel): class CodecTest(BaseModel): """Test model for codec compression support.""" id: Key[str] - timestamp: Annotated[datetime, clickhouse_codec("Delta, LZ4")] - log_blob: Annotated[Any, clickhouse_codec("ZSTD(3)")] - combination_hash: Annotated[list[UInt64], clickhouse_codec("ZSTD(1)")] - temperature: Annotated[float, clickhouse_codec("Gorilla, ZSTD(3)")] - request_count: Annotated[float, clickhouse_codec("DoubleDelta, LZ4")] - user_agent: Annotated[str, clickhouse_codec("ZSTD(3)")] - tags: Annotated[list[str], clickhouse_codec("LZ4")] + timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")] + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + combination_hash: Annotated[list[UInt64], ClickHouseCodec("ZSTD(1)")] + temperature: Annotated[float, ClickHouseCodec("Gorilla, ZSTD(3)")] + request_count: Annotated[float, ClickHouseCodec("DoubleDelta, LZ4")] + user_agent: Annotated[str, ClickHouseCodec("ZSTD(3)")] + tags: Annotated[list[str], ClickHouseCodec("LZ4")] status_code: float diff --git a/templates/typescript-tests/src/ingest/models.ts b/templates/typescript-tests/src/ingest/models.ts index ec18a8a429..53c2b82d3f 100644 --- a/templates/typescript-tests/src/ingest/models.ts +++ b/templates/typescript-tests/src/ingest/models.ts @@ -21,7 +21,7 @@ import { ClickHouseByteSize, ClickHouseJson, Int64, - Codec, + ClickHouseCodec, } from "@514labs/moose-lib"; /** @@ -689,13 +689,13 @@ export const dateTimePrecisionOutputStream = // =======Codec Compression Test======= export interface CodecTest { id: Key; - timestamp: DateTime & Codec<"Delta, LZ4">; - log_blob: Record & Codec<"ZSTD(3)">; - combination_hash: UInt64[] & Codec<"ZSTD(1)">; - temperature: number & Codec<"Gorilla, ZSTD(3)">; - request_count: number & Codec<"DoubleDelta, LZ4">; - user_agent: string & Codec<"ZSTD(3)">; - tags: string[] & Codec<"LZ4">; + timestamp: DateTime & ClickHouseCodec<"Delta, LZ4">; + log_blob: Record & ClickHouseCodec<"ZSTD(3)">; + combination_hash: UInt64[] & ClickHouseCodec<"ZSTD(1)">; + temperature: number & ClickHouseCodec<"Gorilla, ZSTD(3)">; + request_count: number & ClickHouseCodec<"DoubleDelta, LZ4">; + user_agent: string & ClickHouseCodec<"ZSTD(3)">; + tags: string[] & ClickHouseCodec<"LZ4">; status_code: number; } From 5b34ab4edec22705ffac9fa93c4d56c58c2c326b Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 26 Nov 2025 13:12:46 -0700 Subject: [PATCH 06/10] fix --- .../src/infrastructure/olap/clickhouse/mod.rs | 68 ++++++++++++++++--- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index a1a853c3ac..a4c5b36344 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -812,6 +812,10 @@ async fn execute_add_table_column( let clickhouse_column = std_column_to_clickhouse_column(column.clone())?; let column_type_string = basic_field_type_to_string(&clickhouse_column.column_type)?; + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + // Include DEFAULT clause if column has a default value let default_clause = clickhouse_column .default @@ -819,22 +823,34 @@ async fn execute_add_table_column( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); - let cluster_clause = cluster_name - .map(|c| format!(" ON CLUSTER {}", c)) + let codec_clause = clickhouse_column + .codec + .as_ref() + .map(|c| format!(" CODEC({})", c)) .unwrap_or_default(); + let ttl_clause = clickhouse_column + .ttl + .as_ref() + .map(|t| format!(" TTL {}", t)) + .unwrap_or_default(); + + let position_clause = match after_column { + None => "FIRST".to_string(), + Some(after_col) => format!("AFTER `{after_col}`"), + }; + let add_column_query = format!( - "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{}{}{} {}", db_name, table_name, cluster_clause, clickhouse_column.name, column_type_string, default_clause, - match after_column { - None => "FIRST".to_string(), - Some(after_col) => format!("AFTER `{after_col}`"), - } + codec_clause, + ttl_clause, + position_clause ); log::debug!("Adding column: {}", add_column_query); run_query(&add_column_query, client).await.map_err(|e| { @@ -3257,19 +3273,34 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + let ttl_clause = clickhouse_column + .ttl + .as_ref() + .map(|t| format!(" TTL {}", t)) + .unwrap_or_default(); + + let codec_clause = clickhouse_column + .codec + .as_ref() + .map(|c| format!(" CODEC({})", c)) + .unwrap_or_default(); + let add_column_query = format!( - "ALTER TABLE `{}`.`{}` ADD COLUMN `{}` {}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{}{}{} {}", "test_db", "test_table", + "", clickhouse_column.name, column_type_string, default_clause, + codec_clause, + ttl_clause, "FIRST" ); assert_eq!( add_column_query, - "ALTER TABLE `test_db`.`test_table` ADD COLUMN `count` Int32 DEFAULT 42 FIRST" + "ALTER TABLE `test_db`.`test_table` ADD COLUMN `count` Int32 DEFAULT 42 FIRST" ); } @@ -3305,19 +3336,34 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + let ttl_clause = clickhouse_column + .ttl + .as_ref() + .map(|t| format!(" TTL {}", t)) + .unwrap_or_default(); + + let codec_clause = clickhouse_column + .codec + .as_ref() + .map(|c| format!(" CODEC({})", c)) + .unwrap_or_default(); + let add_column_query = format!( - "ALTER TABLE `{}`.`{}` ADD COLUMN `{}` {}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{}{}{} {}", "test_db", "test_table", + "", clickhouse_column.name, column_type_string, default_clause, + codec_clause, + ttl_clause, "AFTER `id`" ); assert_eq!( add_column_query, - "ALTER TABLE `test_db`.`test_table` ADD COLUMN `description` Nullable(String) DEFAULT 'default text' AFTER `id`" + "ALTER TABLE `test_db`.`test_table` ADD COLUMN `description` Nullable(String) DEFAULT 'default text' AFTER `id`" ); } From 161959f838205edc366ffa6cf6fc9c96c3969424 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 26 Nov 2025 15:04:10 -0700 Subject: [PATCH 07/10] fix --- apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index a4c5b36344..ba9f9a65ae 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -911,6 +911,7 @@ async fn execute_modify_table_column( let required_changed = before_column.required != after_column.required; let comment_changed = before_column.comment != after_column.comment; let ttl_changed = before_column.ttl != after_column.ttl; + let codec_changed = before_column.codec != after_column.codec; // If only the comment changed, use a simpler ALTER TABLE ... MODIFY COLUMN ... COMMENT // This is more efficient and avoids unnecessary table rebuilds @@ -918,6 +919,7 @@ async fn execute_modify_table_column( && !required_changed && !default_changed && !ttl_changed + && !codec_changed && comment_changed { log::info!( @@ -956,7 +958,7 @@ async fn execute_modify_table_column( log::info!( "Executing ModifyTableColumn for table: {}, column: {} ({}→{})\ -data_type_changed: {data_type_changed}, default_changed: {default_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}", +data_type_changed: {data_type_changed}, default_changed: {default_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}, codec_changed: {codec_changed}", table_name, after_column.name, before_column.data_type, From f97cc76e23ebc5aceb990b6c5197b83dae804627 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 26 Nov 2025 14:14:55 -0700 Subject: [PATCH 08/10] first pass --- apps/framework-cli/src/cli/local_webserver.rs | 1 + .../framework-cli/src/cli/routines/migrate.rs | 5 + .../framework/core/infra_reality_checker.rs | 3 + .../framework/core/infrastructure/table.rs | 9 + .../src/framework/core/infrastructure_map.rs | 82 ++++++++ apps/framework-cli/src/framework/core/plan.rs | 3 + .../src/framework/core/plan_validator.rs | 2 + .../src/framework/python/generate.rs | 58 +++++- .../src/framework/python/utils.rs | 1 + .../src/framework/streaming/generate.rs | 1 + .../src/framework/typescript/generate.rs | 64 ++++-- .../olap/clickhouse/diff_strategy.rs | 4 + .../infrastructure/olap/clickhouse/mapper.rs | 27 +++ .../src/infrastructure/olap/clickhouse/mod.rs | 38 +++- .../infrastructure/olap/clickhouse/model.rs | 1 + .../infrastructure/olap/clickhouse/queries.rs | 186 +++++++++++++++++- .../olap/clickhouse/type_parser.rs | 1 + .../src/infrastructure/olap/ddl_ordering.rs | 9 + .../processes/kafka_clickhouse_sync.rs | 10 + .../src/utilities/validate_passthrough.rs | 35 ++++ packages/protobuf/infrastructure_map.proto | 2 + packages/py-moose-lib/moose_lib/__init__.py | 1 + .../py-moose-lib/moose_lib/data_models.py | 49 +++++ .../ts-moose-lib/src/browserCompatible.ts | 1 + .../src/dataModels/typeConvert.ts | 34 +++- packages/ts-moose-lib/src/dataModels/types.ts | 26 +++ 26 files changed, 623 insertions(+), 30 deletions(-) diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 94d0acd5c5..8165cb68ef 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3545,6 +3545,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 1aa1f0fab1..7e7c5dc2bf 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -764,6 +764,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -799,6 +800,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); table } @@ -1143,6 +1145,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, after_column: None, database: Some("bad_db".to_string()), @@ -1161,6 +1164,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, after_column: Column { name: "col".to_string(), @@ -1173,6 +1177,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, database: Some("another_bad_db".to_string()), cluster_name: None, diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index 0d8670f4e3..7d95265124 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -516,6 +516,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -612,6 +613,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); let mock_client = MockOlapClient { @@ -682,6 +684,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; actual_table.columns.push(timestamp_col.clone()); infra_table.columns.push(timestamp_col); diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index a27c107d3a..0a68b5ee0a 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -602,6 +602,8 @@ pub struct Column { pub ttl: Option, #[serde(skip_serializing_if = "Option::is_none", default)] pub codec: Option, // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") + #[serde(skip_serializing_if = "Option::is_none", default)] + pub materialized: Option, // MATERIALIZED column expression (computed at write-time, physically stored) } #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -1117,6 +1119,7 @@ impl Column { comment: self.comment.clone(), ttl: self.ttl.clone(), codec: self.codec.clone(), + materialized: self.materialized.clone(), special_fields: Default::default(), } } @@ -1140,6 +1143,7 @@ impl Column { comment: proto.comment, ttl: proto.ttl, codec: proto.codec, + materialized: proto.materialized, } } } @@ -1520,6 +1524,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; let json = serde_json::to_string(&nested_column).unwrap(); @@ -1541,6 +1546,7 @@ mod tests { comment: Some("[MOOSE_METADATA:DO_NOT_MODIFY] {\"version\":1,\"enum\":{\"name\":\"TestEnum\",\"members\":[]}}".to_string()), ttl: None, codec: None, + materialized: None, }; // Convert to proto and back @@ -1565,6 +1571,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; let proto = column_without_comment.to_proto(); @@ -1749,6 +1756,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -1761,6 +1769,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 22c47e397e..e2156b19a4 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -3033,6 +3033,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -3045,6 +3046,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_be_removed".to_string(), @@ -3057,6 +3059,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3092,6 +3095,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -3104,6 +3108,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "age".to_string(), // New column @@ -3116,6 +3121,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "name".to_string()]), // Changed order_by @@ -3165,6 +3171,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_remove".to_string(), @@ -3177,6 +3184,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -3194,6 +3202,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "new_column".to_string(), @@ -3206,6 +3215,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -3342,6 +3352,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3374,6 +3385,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3403,6 +3415,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3416,6 +3429,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3451,6 +3465,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_remove".to_string(), @@ -3463,6 +3478,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_modify".to_string(), @@ -3475,6 +3491,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3491,6 +3508,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_modify".to_string(), // modified @@ -3503,6 +3521,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "new_column".to_string(), // added @@ -3515,6 +3534,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3660,6 +3680,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3673,6 +3694,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3707,6 +3729,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -3719,6 +3742,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3735,6 +3759,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "id".to_string(), @@ -3747,6 +3772,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3775,6 +3801,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; before.columns.push(col.clone()); after.columns.push(col); @@ -3817,6 +3844,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Change every other column type in the after table @@ -3851,6 +3879,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); } @@ -3882,6 +3911,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3898,6 +3928,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3939,6 +3970,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3952,6 +3984,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Test special characters in column name @@ -3966,6 +3999,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3979,6 +4013,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -4004,6 +4039,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let col2 = col1.clone(); assert!(columns_are_equivalent(&col1, &col2)); @@ -4042,6 +4078,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let clickhouse_enum_col = Column { @@ -4067,6 +4104,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent due to the enum semantic comparison @@ -4093,6 +4131,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent( @@ -4112,6 +4151,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let int_col2 = Column { @@ -4125,6 +4165,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent(&int_col1, &int_col2)); @@ -4157,6 +4198,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let json_col2 = Column { @@ -4180,6 +4222,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - order of typed_paths doesn't matter @@ -4206,6 +4249,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent(&json_col1, &json_col3)); @@ -4232,6 +4276,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent(&json_col1, &json_col4)); @@ -4275,6 +4320,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let nested_json_col2 = Column { @@ -4309,6 +4355,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - order doesn't matter at any level @@ -4341,6 +4388,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "priority".to_string(), @@ -4353,6 +4401,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4365,6 +4414,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let col_with_user_name = Column { @@ -4386,6 +4436,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "priority".to_string(), @@ -4398,6 +4449,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4410,6 +4462,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - name difference doesn't matter if structure matches @@ -4437,6 +4490,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], // Missing priority column jwt: false, }), @@ -4448,6 +4502,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent( @@ -4485,6 +4540,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "notifications".to_string(), @@ -4497,6 +4553,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4509,6 +4566,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4520,6 +4578,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4531,6 +4590,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let col_user = Column { @@ -4557,6 +4617,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "notifications".to_string(), @@ -4569,6 +4630,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4581,6 +4643,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4592,6 +4655,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4603,6 +4667,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - name differences at all levels don't matter @@ -4624,15 +4689,18 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // Test 1: Columns with same codec should be equivalent let col_with_codec1 = Column { codec: Some("ZSTD(3)".to_string()), + materialized: None, ..base_col.clone() }; let col_with_codec2 = Column { codec: Some("ZSTD(3)".to_string()), + materialized: None, ..base_col.clone() }; assert!(columns_are_equivalent(&col_with_codec1, &col_with_codec2)); @@ -4640,6 +4708,7 @@ mod diff_tests { // Test 2: Columns with different codecs should not be equivalent let col_with_different_codec = Column { codec: Some("LZ4".to_string()), + materialized: None, ..base_col.clone() }; assert!(!columns_are_equivalent( @@ -4653,10 +4722,12 @@ mod diff_tests { // Test 4: Columns with codec chains should be detected as different let col_with_chain1 = Column { codec: Some("Delta, LZ4".to_string()), + materialized: None, ..base_col.clone() }; let col_with_chain2 = Column { codec: Some("Delta, ZSTD".to_string()), + materialized: None, ..base_col.clone() }; assert!(!columns_are_equivalent(&col_with_chain1, &col_with_chain2)); @@ -4664,10 +4735,12 @@ mod diff_tests { // Test 5: Codec with different compression levels should be detected as different let col_zstd3 = Column { codec: Some("ZSTD(3)".to_string()), + materialized: None, ..base_col.clone() }; let col_zstd9 = Column { codec: Some("ZSTD(9)".to_string()), + materialized: None, ..base_col.clone() }; assert!(!columns_are_equivalent(&col_zstd3, &col_zstd9)); @@ -4675,10 +4748,12 @@ mod diff_tests { // Test 6: Normalized codec comparison - user "Delta" vs ClickHouse "Delta(4)" let col_user_delta = Column { codec: Some("Delta".to_string()), + materialized: None, ..base_col.clone() }; let col_ch_delta = Column { codec: Some("Delta(4)".to_string()), + materialized: None, ..base_col.clone() }; assert!(columns_are_equivalent(&col_user_delta, &col_ch_delta)); @@ -4686,10 +4761,12 @@ mod diff_tests { // Test 7: Normalized codec comparison - user "Gorilla" vs ClickHouse "Gorilla(8)" let col_user_gorilla = Column { codec: Some("Gorilla".to_string()), + materialized: None, ..base_col.clone() }; let col_ch_gorilla = Column { codec: Some("Gorilla(8)".to_string()), + materialized: None, ..base_col.clone() }; assert!(columns_are_equivalent(&col_user_gorilla, &col_ch_gorilla)); @@ -4697,10 +4774,12 @@ mod diff_tests { // Test 8: Normalized chain comparison - "Delta, LZ4" vs "Delta(4), LZ4" let col_user_chain = Column { codec: Some("Delta, LZ4".to_string()), + materialized: None, ..base_col.clone() }; let col_ch_chain = Column { codec: Some("Delta(4), LZ4".to_string()), + materialized: None, ..base_col.clone() }; assert!(columns_are_equivalent(&col_user_chain, &col_ch_chain)); @@ -5060,6 +5139,7 @@ mod diff_topic_tests { comment: None, ttl: None, codec: None, + materialized: None, }], metadata: None, life_cycle: LifeCycle::FullyManaged, @@ -5351,6 +5431,7 @@ mod diff_topic_to_table_sync_process_tests { comment: None, ttl: None, codec: None, + materialized: None, }], version: Some(version.clone()), source_primitive: PrimitiveSignature { @@ -5475,6 +5556,7 @@ mod diff_topic_to_table_sync_process_tests { comment: None, ttl: None, codec: None, + materialized: None, }]; assert_eq!( diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index b9995366f9..fbb0d004a4 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -510,6 +510,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -715,6 +716,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Create test project first to get the database name @@ -1093,6 +1095,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Create mock OLAP client with the reality table diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index b4a0236adf..6d1dfc1783 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -150,6 +150,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -326,6 +327,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index f351c2a60c..7c876c2148 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -557,7 +557,7 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri .unwrap(); writeln!( output, - "from moose_lib import clickhouse_default, ClickHouseCodec, LifeCycle, ClickHouseTTL" + "from moose_lib import clickhouse_default, ClickHouseCodec, ClickHouseMaterialized, LifeCycle, ClickHouseTTL" ) .unwrap(); writeln!( @@ -680,11 +680,27 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri if let Some(ref codec_expr) = column.codec { type_str = format!("Annotated[{}, ClickHouseCodec({:?})]", type_str, codec_expr); } - if let Some(ref default_expr) = column.default { - type_str = format!( - "Annotated[{}, clickhouse_default({:?})]", - type_str, default_expr - ); + // Handle DEFAULT and MATERIALIZED (mutually exclusive) + match (&column.default, &column.materialized) { + (Some(default_expr), None) => { + type_str = format!( + "Annotated[{}, clickhouse_default({:?})]", + type_str, default_expr + ); + } + (None, Some(materialized_expr)) => { + type_str = format!( + "Annotated[{}, ClickHouseMaterialized({:?})]", + type_str, materialized_expr + ); + } + (None, None) => { + // No default or materialized, do nothing + } + (Some(_), Some(_)) => { + // This should never happen due to validation + panic!("Column '{}' has both DEFAULT and MATERIALIZED - this should be caught by validation", column.name) + } } let type_str = if can_use_key_wrapping && column.primary_key { @@ -1049,6 +1065,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -1061,6 +1078,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "optional_text".to_string(), @@ -1073,6 +1091,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["primary_key".to_string()]), @@ -1106,7 +1125,7 @@ from enum import IntEnum, Enum from moose_lib import Key, IngestPipeline, IngestPipelineConfig, OlapTable, OlapConfig, clickhouse_datetime64, clickhouse_decimal, ClickhouseSize, StringToEnumMixin from moose_lib.data_models import ClickHouseJson from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon, FixedString -from moose_lib import clickhouse_default, ClickHouseCodec, LifeCycle, ClickHouseTTL +from moose_lib import clickhouse_default, ClickHouseCodec, ClickHouseMaterialized, LifeCycle, ClickHouseTTL from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, SummingMergeTreeEngine, S3QueueEngine, ReplicatedMergeTreeEngine, ReplicatedReplacingMergeTreeEngine, ReplicatedAggregatingMergeTreeEngine, ReplicatedSummingMergeTreeEngine class Foo(BaseModel): @@ -1137,6 +1156,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "numbers".to_string(), @@ -1152,6 +1172,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "nested_numbers".to_string(), @@ -1170,6 +1191,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1225,6 +1247,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "city".to_string(), @@ -1237,6 +1260,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "zipCode".to_string(), @@ -1249,6 +1273,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -1268,6 +1293,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "address".to_string(), @@ -1280,6 +1306,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "addresses".to_string(), @@ -1295,6 +1322,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1353,6 +1381,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "data".to_string(), @@ -1365,6 +1394,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1423,6 +1453,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1480,6 +1511,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "version".to_string(), @@ -1492,6 +1524,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "is_deleted".to_string(), @@ -1504,6 +1537,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1552,6 +1586,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "coordinates".to_string(), @@ -1567,6 +1602,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "metadata".to_string(), @@ -1582,6 +1618,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1644,6 +1681,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -1656,6 +1694,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "email".to_string(), @@ -1668,6 +1707,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: Some("timestamp + INTERVAL 30 DAY".to_string()), codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), @@ -1715,6 +1755,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1781,6 +1822,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "payload".to_string(), @@ -1802,6 +1844,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1857,6 +1900,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/framework/python/utils.rs b/apps/framework-cli/src/framework/python/utils.rs index cf9a873642..c8a3be9362 100644 --- a/apps/framework-cli/src/framework/python/utils.rs +++ b/apps/framework-cli/src/framework/python/utils.rs @@ -54,6 +54,7 @@ impl ColumnBuilder { comment: None, ttl: None, codec: None, + materialized: None, }) } } diff --git a/apps/framework-cli/src/framework/streaming/generate.rs b/apps/framework-cli/src/framework/streaming/generate.rs index f96692dc50..4da15a4e07 100644 --- a/apps/framework-cli/src/framework/streaming/generate.rs +++ b/apps/framework-cli/src/framework/streaming/generate.rs @@ -515,6 +515,7 @@ my_function = StreamingFunction( comment: None, ttl: None, codec: None, + materialized: None, }) .collect() } diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index e1645c8e27..6c93837340 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -341,6 +341,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> "LifeCycle", "ClickHouseTTL", "ClickHouseCodec", + "ClickHouseMaterialized", ]; if uses_simple_aggregate { @@ -588,24 +589,36 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> } } - // Append ClickHouseTTL type tag if present on the column - if let Some(expr) = &column.ttl { - type_str = format!("{type_str} & ClickHouseTTL<\"{}\">", expr); - } - // Wrap with Codec if present - let type_str = match column.codec.as_ref() { - None => type_str, - Some(ref codec) => format!("{type_str} & ClickHouseCodec<{codec:?}>"), - }; - let type_str = match column.default { - None => type_str, - Some(ref default) if type_str == "Date" => { + // Handle DEFAULT and MATERIALIZED (mutually exclusive) + let type_str = match (&column.default, &column.materialized) { + (Some(default), None) if type_str == "Date" => { // https://github.com/samchon/typia/issues/1658 format!("WithDefault<{type_str}, {:?}>", default) } - Some(ref default) => { + (Some(default), None) => { format!("{type_str} & ClickHouseDefault<{:?}>", default) } + (None, Some(materialized)) => { + format!("{type_str} & ClickHouseMaterialized<{:?}>", materialized) + } + (None, None) => type_str, + (Some(_), Some(_)) => { + // This should never happen due to validation, but handle it gracefully + panic!("Column '{}' has both DEFAULT and MATERIALIZED - this should be caught by validation", column.name) + } + }; + + // Append ClickHouseTTL type tag if present on the column + let type_str = if let Some(expr) = &column.ttl { + format!("{type_str} & ClickHouseTTL<\"{}\">", expr) + } else { + type_str + }; + + // Wrap with Codec if present + let type_str = match column.codec.as_ref() { + None => type_str, + Some(ref codec) => format!("{type_str} & ClickHouseCodec<{codec:?}>"), }; let type_str = if can_use_key_wrapping && column.primary_key { format!("Key<{type_str}>") @@ -945,6 +958,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "city".to_string(), @@ -957,6 +971,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "zip_code".to_string(), @@ -969,6 +984,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -988,6 +1004,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "address".to_string(), @@ -1000,6 +1017,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "addresses".to_string(), @@ -1015,6 +1033,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1076,6 +1095,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "data".to_string(), @@ -1088,6 +1108,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1145,6 +1166,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1197,6 +1219,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "version".to_string(), @@ -1209,6 +1232,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "is_deleted".to_string(), @@ -1221,6 +1245,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1268,6 +1293,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }], sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1321,6 +1347,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "version".to_string(), @@ -1333,6 +1360,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "is_deleted".to_string(), @@ -1345,6 +1373,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], sample_by: None, @@ -1400,6 +1429,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["u64".to_string()]), partition_by: None, @@ -1475,6 +1505,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "status".to_string(), @@ -1487,6 +1518,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1544,6 +1576,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -1556,6 +1589,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "email".to_string(), @@ -1568,6 +1602,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: Some("timestamp + INTERVAL 30 DAY".to_string()), codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), @@ -1617,6 +1652,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "payload".to_string(), @@ -1638,6 +1674,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1687,6 +1724,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 970c48a2f1..ad9c085eca 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -663,6 +663,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -675,6 +676,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(order_by), @@ -801,6 +803,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, position_after: Some("timestamp".to_string()), }]; @@ -859,6 +862,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, position_after: Some("timestamp".to_string()), }]; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index 9fb6273d6d..16769e5b1b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -54,6 +54,27 @@ fn generate_column_comment(column: &Column) -> Result, Clickhouse pub fn std_column_to_clickhouse_column( column: Column, ) -> Result { + // Validate mutual exclusivity of DEFAULT and MATERIALIZED + if column.default.is_some() && column.materialized.is_some() { + return Err(ClickhouseError::InvalidParameters { + message: format!( + "Column '{}' cannot have both DEFAULT and MATERIALIZED. Use one or the other.", + column.name + ), + }); + } + + // Validate that MATERIALIZED columns are not primary keys + if column.materialized.is_some() && column.primary_key { + return Err(ClickhouseError::InvalidParameters { + message: format!( + "Column '{}' cannot be both MATERIALIZED and a primary key. \ + MATERIALIZED columns are computed and cannot be used as primary keys.", + column.name + ), + }); + } + let comment = generate_column_comment(&column)?; let mut column_type = @@ -84,6 +105,7 @@ pub fn std_column_to_clickhouse_column( comment, ttl: column.ttl.clone(), codec: column.codec.clone(), + materialized: column.materialized.clone(), }; Ok(clickhouse_column) @@ -428,6 +450,7 @@ mod tests { comment: Some("This is a user comment about the record type".to_string()), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_with_user_comment).unwrap(); @@ -453,6 +476,7 @@ mod tests { comment: Some(format!("Old user comment {}", old_metadata)), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_with_both).unwrap(); @@ -480,6 +504,7 @@ mod tests { comment: Some(old_metadata), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_metadata_only).unwrap(); @@ -523,6 +548,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "status".to_string(), @@ -535,6 +561,7 @@ mod tests { comment: Some("User status field".to_string()), // User comment ttl: None, codec: None, + materialized: None, }, ], jwt: false, diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index ba9f9a65ae..4b5103395d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -823,6 +823,13 @@ async fn execute_add_table_column( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + // Include MATERIALIZED clause if column has a materialized expression + let materialized_clause = clickhouse_column + .materialized + .as_ref() + .map(|m| format!(" MATERIALIZED {}", m)) + .unwrap_or_default(); + let codec_clause = clickhouse_column .codec .as_ref() @@ -841,13 +848,14 @@ async fn execute_add_table_column( }; let add_column_query = format!( - "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{}{}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{}{}{}{} {}", db_name, table_name, cluster_clause, clickhouse_column.name, column_type_string, default_clause, + materialized_clause, codec_clause, ttl_clause, position_clause @@ -1805,16 +1813,17 @@ impl OlapOperations for ConfiguredDBClient { None }; - let default = match default_kind.deref() { - "" => None, - "DEFAULT" => Some(default_expression), - "MATERIALIZED" | "ALIAS" => { - debug!("MATERIALIZED and ALIAS not yet handled."); - None + let (default, materialized) = match default_kind.deref() { + "" => (None, None), + "DEFAULT" => (Some(default_expression.clone()), None), + "MATERIALIZED" => (None, Some(default_expression.clone())), + "ALIAS" => { + debug!("ALIAS columns not yet supported, skipping column {col_name}"); + continue; // Skip ALIAS columns (they're virtual, not stored) } _ => { debug!("Unknown default kind: {default_kind} for column {col_name}"); - None + (None, None) } }; @@ -1871,6 +1880,7 @@ impl OlapOperations for ConfiguredDBClient { comment: column_comment, ttl: normalized_ttl, codec, + materialized, }; columns.push(column); @@ -2724,6 +2734,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Old user comment".to_string()), ttl: None, codec: None, + materialized: None, }; let after_column = Column { @@ -2743,6 +2754,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("New user comment".to_string()), ttl: None, codec: None, + materialized: None, }; // The execute_modify_table_column function should detect this as comment-only change @@ -2769,6 +2781,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Number of things".to_string()), ttl: None, codec: None, + materialized: None, }; let after_column = Column { default: Some("42".to_string()), @@ -2802,6 +2815,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("old".to_string()), ttl: None, codec: None, + materialized: None, }; let after_column = Column { @@ -2835,6 +2849,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Updated description field".to_string()), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -2873,6 +2888,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Hash of the ID".to_string()), ttl: None, codec: None, + materialized: None, }; let sqls = build_modify_column_sql( @@ -2904,6 +2920,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: None, codec: None, + materialized: None, }; let sqls = build_modify_column_sql( @@ -2935,6 +2952,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: None, codec: None, + materialized: None, }; let sqls = build_modify_column_sql( @@ -3262,6 +3280,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Number of items".to_string()), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -3324,6 +3343,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -3389,6 +3409,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: Some("created_at + INTERVAL 7 DAY".to_string()), codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), @@ -3456,6 +3477,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: Some("created_at + INTERVAL 7 DAY".to_string()), codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs index f978731301..5f09fa7be5 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs @@ -433,6 +433,7 @@ pub struct ClickHouseColumn { pub comment: Option, // Column comment for metadata storage pub ttl: Option, pub codec: Option, // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") + pub materialized: Option, // MATERIALIZED column expression (computed at write-time, physically stored) } impl ClickHouseColumn { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 188b19e154..6ec816134a 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -124,7 +124,7 @@ static CREATE_TABLE_TEMPLATE: &str = r#" CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} ( -{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_codec}} CODEC({{{field_codec}}}){{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#unless @last}}, +{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_materialized}} MATERIALIZED {{{field_materialized}}}{{/if}}{{#if field_codec}} CODEC({{{field_codec}}}){{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#unless @last}}, {{/unless}}{{/each}}{{#if has_indexes}}, {{#each indexes}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} ) ENGINE = {{engine}}{{#if primary_key_string}} @@ -3062,6 +3062,7 @@ fn builds_field_context(columns: &[ClickHouseColumn]) -> Result, Clic // - Numbers come without quotes: 42 // So we use them as-is without additional formatting let formatted_default = column.default.as_ref(); + let formatted_materialized = column.materialized.as_ref(); Ok(json!({ "field_name": column.name, @@ -3069,6 +3070,7 @@ fn builds_field_context(columns: &[ClickHouseColumn]) -> Result, Clic "field_ttl": field_ttl, "field_codec": field_codec, "field_default": formatted_default, + "field_materialized": formatted_materialized, "field_nullable": if let ClickHouseColumnType::Nullable(_) = column.column_type { // if type is Nullable, do not add extra specifier "".to_string() @@ -3106,6 +3108,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_2".to_string(), @@ -3117,6 +3120,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_3".to_string(), @@ -3128,6 +3132,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_4".to_string(), @@ -3139,6 +3144,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_5".to_string(), @@ -3150,6 +3156,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_6".to_string(), @@ -3173,6 +3180,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_7".to_string(), @@ -3184,6 +3192,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3269,6 +3278,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "name".to_string(), @@ -3280,6 +3290,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -3320,6 +3331,7 @@ PRIMARY KEY (`id`) comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3358,6 +3370,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3398,6 +3411,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "sample_hash".to_string(), @@ -3409,6 +3423,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "created_at".to_string(), @@ -3420,6 +3435,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -3460,6 +3476,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -3501,6 +3518,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }], engine: ClickhouseEngine::ReplacingMergeTree { ver: None, @@ -3538,6 +3556,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "version".to_string(), @@ -3549,6 +3568,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3593,6 +3613,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "version".to_string(), @@ -3604,6 +3625,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "is_deleted".to_string(), @@ -3615,6 +3637,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3659,6 +3682,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }], sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3765,6 +3789,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_data".to_string(), @@ -3779,6 +3804,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "field2".to_string(), @@ -3790,6 +3816,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ]), required: true, @@ -3799,6 +3826,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "status".to_string(), @@ -3822,6 +3850,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], sample_by: None, @@ -3872,6 +3901,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "data".to_string(), @@ -3883,6 +3913,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -4358,6 +4389,7 @@ SETTINGS keeper_path = '/clickhouse/s3queue/test_table', mode = 'unordered', s3q comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -4906,6 +4938,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -4954,6 +4987,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -5052,6 +5086,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }; let cluster_clause = Some("test_cluster") @@ -5929,6 +5964,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "log_blob".to_string(), @@ -5940,6 +5976,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: Some("ZSTD(3)".to_string()), + materialized: None, }, ClickHouseColumn { name: "timestamp".to_string(), @@ -5951,6 +5988,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: Some("Delta, LZ4".to_string()), + materialized: None, }, ClickHouseColumn { name: "tags".to_string(), @@ -5962,6 +6000,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: Some("ZSTD(1)".to_string()), + materialized: None, }, ]; @@ -5994,4 +6033,149 @@ ORDER BY (`id`) "#; assert_eq!(query.trim(), expected.trim()); } + + #[test] + fn test_create_table_with_materialized_column() { + use crate::framework::versions::Version; + + let columns = vec![ + ClickHouseColumn { + name: "event_time".to_string(), + column_type: ClickHouseColumnType::DateTime64 { precision: 3 }, + required: true, + primary_key: false, + unique: false, + default: None, + materialized: None, + comment: None, + ttl: None, + codec: None, + }, + ClickHouseColumn { + name: "event_date".to_string(), + column_type: ClickHouseColumnType::Date, + required: true, + primary_key: false, + unique: false, + default: None, + materialized: Some("toDate(event_time)".to_string()), + comment: None, + ttl: None, + codec: None, + }, + ]; + + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns, + order_by: OrderBy::Fields(vec!["event_time".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + let expected = r#" +CREATE TABLE IF NOT EXISTS `test_db`.`test_table` +( + `event_time` DateTime64(3) NOT NULL, + `event_date` Date NOT NULL MATERIALIZED toDate(event_time) +) +ENGINE = MergeTree +ORDER BY (`event_time`) +"#; + assert_eq!(query.trim(), expected.trim()); + } + + #[test] + fn test_materialized_column_with_codec() { + use crate::framework::core::infrastructure::table::JsonOptions; + use crate::framework::versions::Version; + + // Test customer's use case: MATERIALIZED column with CODEC + let columns = vec![ + ClickHouseColumn { + name: "log_blob".to_string(), + column_type: ClickHouseColumnType::Json(JsonOptions::default()), + required: true, + primary_key: false, + unique: false, + default: None, + materialized: None, + comment: None, + ttl: None, + codec: Some("ZSTD(3)".to_string()), + }, + ClickHouseColumn { + name: "combination_hash".to_string(), + column_type: ClickHouseColumnType::Array(Box::new( + ClickHouseColumnType::ClickhouseInt(ClickHouseInt::UInt64), + )), + required: true, + primary_key: false, + unique: false, + default: None, + materialized: Some( + "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))".to_string(), + ), + comment: None, + ttl: None, + codec: Some("ZSTD(1)".to_string()), + }, + ]; + + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "logs".to_string(), + columns, + order_by: OrderBy::SingleExpr("tuple()".to_string()), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Verify the query contains the MATERIALIZED clause and CODEC + assert!(query.contains("MATERIALIZED arrayMap")); + assert!(query.contains("CODEC(ZSTD(1))")); + assert!(query.contains("CODEC(ZSTD(3))")); + } + + #[test] + fn test_validation_default_and_materialized_mutually_exclusive() { + use crate::framework::core::infrastructure::table::{Column, ColumnType, IntType}; + use crate::infrastructure::olap::clickhouse::mapper::std_column_to_clickhouse_column; + + let column = Column { + name: "bad_column".to_string(), + data_type: ColumnType::Int(IntType::Int32), + required: true, + unique: false, + primary_key: false, + default: Some("42".to_string()), + materialized: Some("id + 1".to_string()), // Invalid: both default and materialized + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }; + + let result = std_column_to_clickhouse_column(column); + assert!(result.is_err()); + let error_msg = result.unwrap_err().to_string(); + assert!( + error_msg.contains("both DEFAULT and MATERIALIZED") + || error_msg.contains("mutually exclusive") + ); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs index 14a606179e..cdb26dd80b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs @@ -1679,6 +1679,7 @@ pub fn convert_ast_to_column_type( comment: None, ttl: None, codec: None, + materialized: None, }); } TupleElement::Unnamed(_) => { diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index c2cc6cb282..1987176a42 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -1352,6 +1352,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, after_column: None, dependency_info: DependencyInfo { @@ -1675,6 +1676,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; // Create operations with correct dependencies @@ -2730,6 +2732,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; // Create operations with signatures that work with the current implementation @@ -2918,6 +2921,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "old_column".to_string(), @@ -2930,6 +2934,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -2965,6 +2970,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "new_column".to_string(), @@ -2977,6 +2983,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3011,6 +3018,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }), ColumnChange::Added { column: Column { @@ -3024,6 +3032,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, position_after: Some("id".to_string()), }, diff --git a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs index 19d69dbe49..bcdcc11dfc 100644 --- a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs +++ b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs @@ -1215,6 +1215,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "B".to_string(), @@ -1227,6 +1228,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "C".to_string(), @@ -1245,6 +1247,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "b".to_string(), @@ -1263,6 +1266,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "e".to_string(), @@ -1275,6 +1279,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "f".to_string(), @@ -1287,6 +1292,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], }), @@ -1298,6 +1304,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "c".to_string(), @@ -1310,6 +1317,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], }), @@ -1321,6 +1329,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "D".to_string(), @@ -1333,6 +1342,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], }; diff --git a/apps/framework-cli/src/utilities/validate_passthrough.rs b/apps/framework-cli/src/utilities/validate_passthrough.rs index 087b8b4049..9bdcf2b499 100644 --- a/apps/framework-cli/src/utilities/validate_passthrough.rs +++ b/apps/framework-cli/src/utilities/validate_passthrough.rs @@ -649,6 +649,7 @@ impl<'de, S: SerializeValue> Visitor<'de> for &mut ValueVisitor<'_, S> { comment: None, ttl: None, codec: None, + materialized: None, } }) .collect(); @@ -1318,6 +1319,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "int_col".to_string(), @@ -1330,6 +1332,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "float_col".to_string(), @@ -1342,6 +1345,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "bool_col".to_string(), @@ -1354,6 +1358,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "date_col".to_string(), @@ -1366,6 +1371,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1401,6 +1407,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let json = r#" @@ -1436,6 +1443,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let json = r#" @@ -1478,6 +1486,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test valid enum value @@ -1528,6 +1537,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "nested_int".to_string(), @@ -1540,6 +1550,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1555,6 +1566,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "nested_object".to_string(), @@ -1571,6 +1583,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1630,6 +1643,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "optional_field".to_string(), @@ -1642,6 +1656,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1674,6 +1689,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "aud".to_string(), @@ -1686,6 +1702,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "exp".to_string(), @@ -1698,6 +1715,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1713,6 +1731,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "jwt_object".to_string(), @@ -1729,6 +1748,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1775,6 +1795,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test valid map @@ -1833,6 +1854,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test valid map with numeric keys (as strings in JSON) @@ -1888,6 +1910,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Min boundary 0 @@ -1932,6 +1955,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Min boundary -32768 @@ -1976,6 +2000,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 127usize; @@ -2022,6 +2047,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 255usize; @@ -2068,6 +2094,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let limit: BigUint = BigUint::from(1u8) << 256usize; @@ -2115,6 +2142,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Valid keys @@ -2156,6 +2184,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 255usize; @@ -2197,6 +2226,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let limit: BigUint = BigUint::from(1u8) << 256usize; @@ -2242,6 +2272,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let json = r#" @@ -2274,6 +2305,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // missing nested path @@ -2307,6 +2339,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // null at the nested path counts as missing for non-nullable types @@ -2355,6 +2388,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test 1: Two's complement value (what -1 becomes with naive cast) should be rejected @@ -2425,6 +2459,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test negative values work with i64 diff --git a/packages/protobuf/infrastructure_map.proto b/packages/protobuf/infrastructure_map.proto index df8c3c5e47..655dfa1a90 100644 --- a/packages/protobuf/infrastructure_map.proto +++ b/packages/protobuf/infrastructure_map.proto @@ -243,6 +243,8 @@ message Column { optional string ttl = 10; // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") optional string codec = 11; + // MATERIALIZED column expression (computed at write-time, physically stored) + optional string materialized = 12; } enum SimpleColumnType { diff --git a/packages/py-moose-lib/moose_lib/__init__.py b/packages/py-moose-lib/moose_lib/__init__.py index d50c9c72a5..8ef2d3d3f7 100644 --- a/packages/py-moose-lib/moose_lib/__init__.py +++ b/packages/py-moose-lib/moose_lib/__init__.py @@ -41,6 +41,7 @@ clickhouse_default, ClickHouseTTL, ClickHouseCodec, + ClickHouseMaterialized, # Integer types Int8, Int16, diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index dc0a3591b6..770a7c9a46 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -77,6 +77,41 @@ class ClickHouseCodec: expression: str +@dataclasses.dataclass(frozen=True) +class ClickHouseMaterialized: + """ + ClickHouse MATERIALIZED column annotation. + The column value is computed at INSERT time and physically stored. + Cannot be explicitly inserted by users. + + Args: + expression: ClickHouse SQL expression using column names (snake_case) + + Examples: + # Extract date component + event_date: Annotated[date, ClickHouseMaterialized("toDate(event_time)")] + + # Precompute hash + user_hash: Annotated[int, ClickHouseMaterialized("cityHash64(user_id)")] + + # Complex expression with JSON + combination_hash: Annotated[ + list[int], + ClickHouseMaterialized( + "arrayMap(kv -> cityHash64(kv.1, kv.2), " + "JSONExtractKeysAndValuesRaw(toString(log_blob)))" + ) + ] + + Notes: + - Expression uses ClickHouse column names, not Python field names + - MATERIALIZED and DEFAULT are mutually exclusive + - Can be combined with ClickHouseCodec for compression + - Changing the expression requires DROP + ADD (data is lost) + """ + expression: str + + @dataclasses.dataclass(frozen=True) class ClickHouseJson: max_dynamic_paths: int | None = None @@ -619,6 +654,19 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: None, ) + # Extract MATERIALIZED expression from metadata, if provided + materialized_expr = next( + (md.expression for md in mds if isinstance(md, ClickHouseMaterialized)), + None, + ) + + # Validate mutual exclusivity of DEFAULT and MATERIALIZED + if default_expr and materialized_expr: + raise ValueError( + f"Column '{column_name}' cannot have both DEFAULT and MATERIALIZED. " + f"Use one or the other." + ) + # Extract TTL expression from metadata, if provided ttl_expr = next( (md.expression for md in mds if isinstance(md, ClickHouseTTL)), @@ -639,6 +687,7 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: unique=False, primary_key=primary_key, default=default_expr, + materialized=materialized_expr, annotations=annotations, ttl=ttl_expr, codec=codec_expr, diff --git a/packages/ts-moose-lib/src/browserCompatible.ts b/packages/ts-moose-lib/src/browserCompatible.ts index a0d74719ac..e3a19f8a6f 100644 --- a/packages/ts-moose-lib/src/browserCompatible.ts +++ b/packages/ts-moose-lib/src/browserCompatible.ts @@ -61,6 +61,7 @@ export { ClickHouseNamedTuple, ClickHouseDefault, ClickHouseTTL, + ClickHouseMaterialized, WithDefault, ClickHouseCodec, // Added friendly aliases and numeric helpers diff --git a/packages/ts-moose-lib/src/dataModels/typeConvert.ts b/packages/ts-moose-lib/src/dataModels/typeConvert.ts index 93b0c778de..50adc708df 100644 --- a/packages/ts-moose-lib/src/dataModels/typeConvert.ts +++ b/packages/ts-moose-lib/src/dataModels/typeConvert.ts @@ -306,6 +306,27 @@ const handleDefault = (t: ts.Type, checker: TypeChecker): string | null => { return defaultType.value; }; +/** Detect ClickHouse materialized annotation on a type and return raw sql */ +const handleMaterialized = ( + t: ts.Type, + checker: TypeChecker, +): string | null => { + const materializedType = getTaggedType( + t, + checker, + "_clickhouse_materialized", + ); + if (materializedType === null) { + return null; + } + if (!materializedType.isStringLiteral()) { + throw new UnsupportedFeature( + 'ClickHouseMaterialized must use a string literal, e.g. ClickHouseMaterialized<"now()">', + ); + } + return materializedType.value; +}; + /** Detect ClickHouse TTL annotation on a type and return raw sql */ const handleTtl = (t: ts.Type, checker: TypeChecker): string | null => { const ttlType = getTaggedType(t, checker, "_clickhouse_ttl"); @@ -970,13 +991,24 @@ export const toColumns = (t: ts.Type, checker: TypeChecker): Column[] => { node?.type, ); + const defaultValue = defaultExpression ?? handleDefault(type, checker); + const materializedValue = handleMaterialized(type, checker); + + // Validate mutual exclusivity of DEFAULT and MATERIALIZED + if (defaultValue && materializedValue) { + throw new UnsupportedFeature( + `Column '${prop.name}' cannot have both ClickHouseDefault and ClickHouseMaterialized. Use one or the other.`, + ); + } + return { name: prop.name, data_type: dataType, primary_key: isKey, required: !nullable, unique: false, - default: defaultExpression ?? handleDefault(type, checker), + default: defaultValue, + materialized: materializedValue, ttl: handleTtl(type, checker), codec: handleCodec(type, checker), annotations, diff --git a/packages/ts-moose-lib/src/dataModels/types.ts b/packages/ts-moose-lib/src/dataModels/types.ts index cca3b934bd..d104eb5cab 100644 --- a/packages/ts-moose-lib/src/dataModels/types.ts +++ b/packages/ts-moose-lib/src/dataModels/types.ts @@ -203,6 +203,32 @@ export type ClickHouseTTL = { _clickhouse_ttl?: SqlExpression; }; +/** + * ClickHouse MATERIALIZED column annotation. + * The column value is computed at INSERT time and physically stored. + * Cannot be explicitly inserted by users. + * + * @example + * interface Events { + * eventTime: Date; + * // Extract date component - computed and stored at insert time + * eventDate: Date & ClickHouseMaterialized<"toDate(event_time)">; + * + * userId: string; + * // Precompute hash for fast lookups + * userHash: number & ClickHouseInt<"uint64"> & ClickHouseMaterialized<"cityHash64(user_id)">; + * } + * + * @remarks + * - Expression uses ClickHouse column names (snake_case), not TypeScript field names + * - MATERIALIZED and DEFAULT are mutually exclusive + * - Can be combined with ClickHouseCodec for compression + * - Changing the expression requires DROP + ADD (data is lost) + */ +export type ClickHouseMaterialized = { + _clickhouse_materialized?: SqlExpression; +}; + /** * See also {@link ClickHouseDefault} * From 16c70035c76e241e169186ae5d10aff512a90825 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 26 Nov 2025 17:12:12 -0700 Subject: [PATCH 09/10] fixes --- .../test/utils/database-utils.ts | 27 +++++ .../test/utils/schema-definitions.ts | 42 +++++++ .../src/framework/core/infrastructure_map.rs | 1 + .../src/infrastructure/olap/clickhouse/mod.rs | 110 +++++++++++++++++- .../py-moose-lib/moose_lib/data_models.py | 1 + .../py-moose-lib/tests/test_materialized.py | 70 +++++++++++ .../src/dataModels/dataModelTypes.ts | 1 + packages/ts-moose-lib/src/dmv2/internal.ts | 5 + .../tests/cluster-validation.test.ts | 1 + .../tests/olap-table-versioning.test.ts | 1 + .../ts-moose-lib/tests/typeConvert.test.ts | 29 +++++ templates/python-tests/src/ingest/models.py | 29 +++++ .../typescript-tests/src/ingest/models.ts | 26 +++++ 13 files changed, 339 insertions(+), 4 deletions(-) create mode 100644 packages/py-moose-lib/tests/test_materialized.py diff --git a/apps/framework-cli-e2e/test/utils/database-utils.ts b/apps/framework-cli-e2e/test/utils/database-utils.ts index 60ae5a38c1..1d7b8d6c56 100644 --- a/apps/framework-cli-e2e/test/utils/database-utils.ts +++ b/apps/framework-cli-e2e/test/utils/database-utils.ts @@ -256,6 +256,7 @@ export interface ExpectedColumn { nullable?: boolean; comment?: string; codec?: string | RegExp; + materialized?: string | RegExp; } /** @@ -454,6 +455,32 @@ export const validateTableSchema = async ( ); } } + + // Materialized validation (if specified) + if (expectedCol.materialized !== undefined) { + const actualMaterialized = actualCol.default_expression; + const actualDefaultType = actualCol.default_type; + let materializedMatches = false; + + // Check that it's actually a MATERIALIZED column + if (actualDefaultType === "MATERIALIZED") { + if (typeof expectedCol.materialized === "string") { + // Exact string match + materializedMatches = + actualMaterialized === expectedCol.materialized; + } else if (expectedCol.materialized instanceof RegExp) { + // Regex match for complex expressions + materializedMatches = + expectedCol.materialized.test(actualMaterialized); + } + } + + if (!materializedMatches) { + errors.push( + `Column '${expectedCol.name}' materialized mismatch: expected '${expectedCol.materialized}', got '${actualDefaultType === "MATERIALIZED" ? actualMaterialized : "(not materialized)"}'`, + ); + } + } } // Check for unexpected columns (optional - could be made configurable) diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index c0bbebaee7..8ac9e5b5e2 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -436,6 +436,25 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "status_code", type: "Float64" }, ], }, + // Materialized column test table + { + tableName: "MaterializedTest", + columns: [ + { name: "id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "userId", type: "String" }, + { name: "eventDate", type: "Date", materialized: "toDate(timestamp)" }, + { name: "userHash", type: "UInt64", materialized: "cityHash64(userId)" }, + { name: "log_blob", type: "JSON", codec: "ZSTD(3)" }, + { + name: "combinationHash", + type: "Array(UInt64)", + materialized: + "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))", + codec: "ZSTD(1)", + }, + ], + }, ]; // ============ PYTHON TEMPLATE SCHEMA DEFINITIONS ============ @@ -835,6 +854,29 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "status_code", type: "Float64" }, ], }, + // Materialized column test table + { + tableName: "MaterializedTest", + columns: [ + { name: "id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "user_id", type: "String" }, + { name: "event_date", type: "Date", materialized: "toDate(timestamp)" }, + { + name: "user_hash", + type: "UInt64", + materialized: "cityHash64(user_id)", + }, + { name: "log_blob", type: "JSON", codec: "ZSTD(3)" }, + { + name: "combination_hash", + type: "Array(UInt64)", + materialized: + "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))", + codec: "ZSTD(1)", + }, + ], + }, ]; // ============ HELPER FUNCTIONS ============ diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index e2156b19a4..a93f0ffe37 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -2776,6 +2776,7 @@ fn columns_are_equivalent(before: &Column, after: &Column) -> bool { || before.unique != after.unique // primary_key change is handled at the table level || before.default != after.default + || before.materialized != after.materialized || before.annotations != after.annotations || before.comment != after.comment { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 4b5103395d..734e98023f 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -916,6 +916,7 @@ async fn execute_modify_table_column( // Check if only the comment has changed let data_type_changed = before_column.data_type != after_column.data_type; let default_changed = before_column.default != after_column.default; + let materialized_changed = before_column.materialized != after_column.materialized; let required_changed = before_column.required != after_column.required; let comment_changed = before_column.comment != after_column.comment; let ttl_changed = before_column.ttl != after_column.ttl; @@ -926,6 +927,7 @@ async fn execute_modify_table_column( if !data_type_changed && !required_changed && !default_changed + && !materialized_changed && !ttl_changed && !codec_changed && comment_changed @@ -966,7 +968,7 @@ async fn execute_modify_table_column( log::info!( "Executing ModifyTableColumn for table: {}, column: {} ({}→{})\ -data_type_changed: {data_type_changed}, default_changed: {default_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}, codec_changed: {codec_changed}", +data_type_changed: {data_type_changed}, default_changed: {default_changed}, materialized_changed: {materialized_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}, codec_changed: {codec_changed}", table_name, after_column.name, before_column.data_type, @@ -978,6 +980,8 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ // Build all the SQL statements needed (main modify + optional removes) let removing_default = before_column.default.is_some() && after_column.default.is_none(); + let removing_materialized = + before_column.materialized.is_some() && after_column.materialized.is_none(); let removing_ttl = before_column.ttl.is_some() && after_column.ttl.is_none(); let removing_codec = before_column.codec.is_some() && after_column.codec.is_none(); let queries = build_modify_column_sql( @@ -985,6 +989,7 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ table_name, &clickhouse_column, removing_default, + removing_materialized, removing_ttl, removing_codec, cluster_name, @@ -1035,11 +1040,13 @@ async fn execute_modify_column_comment( Ok(()) } +#[allow(clippy::too_many_arguments)] fn build_modify_column_sql( db_name: &str, table_name: &str, ch_col: &ClickHouseColumn, removing_default: bool, + removing_materialized: bool, removing_ttl: bool, removing_codec: bool, cluster_name: Option<&str>, @@ -1061,6 +1068,14 @@ fn build_modify_column_sql( )); } + // Add REMOVE MATERIALIZED statement if needed + if removing_materialized { + statements.push(format!( + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE MATERIALIZED", + db_name, table_name, cluster_clause, ch_col.name + )); + } + // Add REMOVE TTL statement if needed if removing_ttl { statements.push(format!( @@ -1090,6 +1105,13 @@ fn build_modify_column_sql( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + // MATERIALIZED clause: If omitted, ClickHouse KEEPS any existing MATERIALIZED + let materialized_clause = ch_col + .materialized + .as_ref() + .map(|m| format!(" MATERIALIZED {}", m)) + .unwrap_or_default(); + // TTL clause: If omitted, ClickHouse KEEPS any existing TTL // Therefore, TTL removal requires a separate REMOVE TTL statement let ttl_clause = ch_col @@ -1110,26 +1132,28 @@ fn build_modify_column_sql( let main_sql = if let Some(ref comment) = ch_col.comment { let escaped_comment = comment.replace('\'', "''"); format!( - "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{} COMMENT '{}'", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{}{} COMMENT '{}'", db_name, table_name, cluster_clause, ch_col.name, column_type_string, default_clause, + materialized_clause, codec_clause, ttl_clause, escaped_comment ) } else { format!( - "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{}", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{}{}", db_name, table_name, cluster_clause, ch_col.name, column_type_string, default_clause, + materialized_clause, codec_clause, ttl_clause ) @@ -2790,7 +2814,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra let ch_after = std_column_to_clickhouse_column(after_column).unwrap(); let sqls = - build_modify_column_sql("db", "table", &ch_after, false, false, false, None).unwrap(); + build_modify_column_sql("db", "table", &ch_after, false, false, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2861,6 +2886,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -2898,6 +2924,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -2930,6 +2957,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -2962,6 +2990,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -2974,6 +3003,79 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ); } + #[test] + fn test_modify_column_with_materialized() { + use crate::infrastructure::olap::clickhouse::model::ClickHouseColumn; + + // Test changing a MATERIALIZED expression + let ch_col = ClickHouseColumn { + name: "event_date".to_string(), + column_type: ClickHouseColumnType::Date, + required: true, + primary_key: false, + unique: false, + default: None, + materialized: Some("toStartOfMonth(event_time)".to_string()), + comment: None, + ttl: None, + codec: None, + }; + + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &ch_col, + false, // removing_default + false, // removing_materialized + false, // removing_ttl + false, // removing_codec + None, + ) + .unwrap(); + + assert_eq!(sqls.len(), 1); + assert_eq!( + sqls[0], + "ALTER TABLE `test_db`.`test_table` MODIFY COLUMN IF EXISTS `event_date` Date MATERIALIZED toStartOfMonth(event_time)" + ); + } + + #[test] + fn test_remove_materialized_sql_generation() { + use crate::infrastructure::olap::clickhouse::model::ClickHouseColumn; + + let ch_col = ClickHouseColumn { + name: "user_hash".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::UInt64), + required: true, + primary_key: false, + unique: false, + default: None, + materialized: None, + comment: None, + ttl: None, + codec: None, + }; + + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &ch_col, + false, + true, + false, + false, + None, + ) + .unwrap(); + + assert!(!sqls.is_empty()); + assert_eq!( + sqls[0], + "ALTER TABLE `test_db`.`test_table` MODIFY COLUMN `user_hash` REMOVE MATERIALIZED" + ); + } + #[test] fn test_extract_order_by_from_create_query_nested_objects() { // Test with deeply nested structure diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index 770a7c9a46..fa2a5c63ce 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -312,6 +312,7 @@ class Column(BaseModel): annotations: list[Tuple[str, Any]] = [] ttl: str | None = None codec: str | None = None + materialized: str | None = None def to_expr(self): # Lazy import to avoid circular dependency at import time diff --git a/packages/py-moose-lib/tests/test_materialized.py b/packages/py-moose-lib/tests/test_materialized.py new file mode 100644 index 0000000000..adedb52286 --- /dev/null +++ b/packages/py-moose-lib/tests/test_materialized.py @@ -0,0 +1,70 @@ +from datetime import datetime, date +from typing import Annotated, Any +from pydantic import BaseModel +from moose_lib import Key, ClickHouseMaterialized, ClickHouseCodec, UInt64 +from moose_lib.data_models import _to_columns +import pytest + + +def test_materialized_basic(): + """Test basic MATERIALIZED annotation converts to correct expression.""" + + class MaterializedTest(BaseModel): + timestamp: datetime + event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")] + + columns = _to_columns(MaterializedTest) + by_name = {col.name: col for col in columns} + + assert by_name["timestamp"].materialized is None + assert by_name["event_date"].materialized == "toDate(timestamp)" + + +def test_materialized_hash(): + """Test MATERIALIZED with hash function.""" + + class HashTest(BaseModel): + user_id: str + user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")] + + columns = _to_columns(HashTest) + by_name = {col.name: col for col in columns} + + assert by_name["user_id"].materialized is None + assert by_name["user_hash"].materialized == "cityHash64(user_id)" + + +def test_materialized_with_codec(): + """Test MATERIALIZED combined with CODEC.""" + + class MaterializedCodecTest(BaseModel): + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + combination_hash: Annotated[ + list[UInt64], + ClickHouseMaterialized("arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"), + ClickHouseCodec("ZSTD(1)") + ] + + columns = _to_columns(MaterializedCodecTest) + by_name = {col.name: col for col in columns} + + assert by_name["log_blob"].materialized is None + assert by_name["log_blob"].codec == "ZSTD(3)" + assert by_name["combination_hash"].materialized == "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))" + assert by_name["combination_hash"].codec == "ZSTD(1)" + + +def test_materialized_mutually_exclusive_with_default(): + """Test that MATERIALIZED and DEFAULT are mutually exclusive.""" + from moose_lib import clickhouse_default + + class BadModel(BaseModel): + bad_field: Annotated[ + str, + clickhouse_default("'default_value'"), + ClickHouseMaterialized("'materialized_value'") + ] + + with pytest.raises(ValueError, match="cannot have both DEFAULT and MATERIALIZED"): + _to_columns(BadModel) + diff --git a/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts b/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts index ba932bc772..30162c850c 100644 --- a/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts +++ b/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts @@ -32,6 +32,7 @@ export interface Column { unique: false; // what is this for? primary_key: boolean; default: string | null; + materialized: string | null; ttl: string | null; codec: string | null; annotations: [string, any][]; diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index 9132ed5cdb..bcc6fddef8 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -1163,6 +1163,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "errorMessage", @@ -1174,6 +1175,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "errorType", @@ -1185,6 +1187,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "failedAt", @@ -1196,6 +1199,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "source", @@ -1207,6 +1211,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, ]; diff --git a/packages/ts-moose-lib/tests/cluster-validation.test.ts b/packages/ts-moose-lib/tests/cluster-validation.test.ts index c7939c3ce0..e7b52e6d3a 100644 --- a/packages/ts-moose-lib/tests/cluster-validation.test.ts +++ b/packages/ts-moose-lib/tests/cluster-validation.test.ts @@ -23,6 +23,7 @@ const createMockColumns = (fields: string[]): Column[] => unique: false, primary_key: false, default: null, + materialized: null, ttl: null, codec: null, annotations: [], diff --git a/packages/ts-moose-lib/tests/olap-table-versioning.test.ts b/packages/ts-moose-lib/tests/olap-table-versioning.test.ts index 9496bc2125..8f3cd65c2b 100644 --- a/packages/ts-moose-lib/tests/olap-table-versioning.test.ts +++ b/packages/ts-moose-lib/tests/olap-table-versioning.test.ts @@ -26,6 +26,7 @@ const createMockColumns = (fields: string[]): Column[] => unique: false, primary_key: false, default: null, + materialized: null, ttl: null, codec: null, annotations: [], diff --git a/packages/ts-moose-lib/tests/typeConvert.test.ts b/packages/ts-moose-lib/tests/typeConvert.test.ts index b4658b8655..0af1800fc7 100644 --- a/packages/ts-moose-lib/tests/typeConvert.test.ts +++ b/packages/ts-moose-lib/tests/typeConvert.test.ts @@ -289,4 +289,33 @@ describe("typeConvert mappings for helper types", function () { fs.rmSync(tempDir, { recursive: true, force: true }); } }); + + it("maps Materialized annotations for computed columns", function () { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "moose-typeconv-")); + try { + const source = ` + import { ClickHouseMaterialized, UInt64 } from "@514labs/moose-lib"; + import typia from "typia"; + + export interface TestModel { + timestamp: Date; + userId: string; + eventDate: Date & ClickHouseMaterialized<"toDate(timestamp)">; + userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; + no_materialized: string; + } + `; + const { checker, type } = createProgramWithSource(tempDir, source); + const columns = toColumns(type, checker); + const byName = Object.fromEntries(columns.map((c) => [c.name, c])); + + expect(byName.timestamp.materialized).to.equal(null); + expect(byName.userId.materialized).to.equal(null); + expect(byName.eventDate.materialized).to.equal("toDate(timestamp)"); + expect(byName.userHash.materialized).to.equal("cityHash64(userId)"); + expect(byName.no_materialized.materialized).to.equal(null); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); }); diff --git a/templates/python-tests/src/ingest/models.py b/templates/python-tests/src/ingest/models.py index acffbb3887..61f941e7a2 100644 --- a/templates/python-tests/src/ingest/models.py +++ b/templates/python-tests/src/ingest/models.py @@ -706,3 +706,32 @@ class CodecTest(BaseModel): table=True, dead_letter_queue=True )) + + +# =======Materialized Columns Test======= +from moose_lib import ClickHouseMaterialized + +class MaterializedTest(BaseModel): + """Test model for materialized column support.""" + id: Key[str] + timestamp: datetime + user_id: str + # Extract date from timestamp - computed and stored at insert time + event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")] + # Precompute hash for fast lookups + user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")] + # Combine MATERIALIZED with CODEC + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + combination_hash: Annotated[ + list[UInt64], + ClickHouseMaterialized("arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"), + ClickHouseCodec("ZSTD(1)") + ] + + +materialized_test_model = IngestPipeline[MaterializedTest]("MaterializedTest", IngestPipelineConfig( + ingest_api=True, + stream=True, + table=True, + dead_letter_queue=True +)) diff --git a/templates/typescript-tests/src/ingest/models.ts b/templates/typescript-tests/src/ingest/models.ts index 53c2b82d3f..a2c9497a44 100644 --- a/templates/typescript-tests/src/ingest/models.ts +++ b/templates/typescript-tests/src/ingest/models.ts @@ -704,3 +704,29 @@ export const CodecTestPipeline = new IngestPipeline("CodecTest", { stream: true, ingestApi: true, }); + +// =======Materialized Columns Test======= +import { ClickHouseMaterialized } from "@514labs/moose-lib"; + +export interface MaterializedTest { + id: Key; + timestamp: DateTime; + userId: string; + eventDate: string & + typia.tags.Format<"date"> & + ClickHouseMaterialized<"toDate(timestamp)">; + userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; + log_blob: Record & ClickHouseCodec<"ZSTD(3)">; + combinationHash: UInt64[] & + ClickHouseMaterialized<"arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"> & + ClickHouseCodec<"ZSTD(1)">; +} + +export const MaterializedTestPipeline = new IngestPipeline( + "MaterializedTest", + { + table: true, + stream: true, + ingestApi: true, + }, +); From d8b3963c975eb0e871300339364bcae7c98e2fbe Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Sun, 30 Nov 2025 17:19:30 -0700 Subject: [PATCH 10/10] ok --- .../test/utils/schema-definitions.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index 8ac9e5b5e2..82a6e946fe 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -443,7 +443,11 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "id", type: "String" }, { name: "timestamp", type: /DateTime\('UTC'\)/ }, { name: "userId", type: "String" }, - { name: "eventDate", type: "Date", materialized: "toDate(timestamp)" }, + { + name: "eventDate", + type: /Date(32)?/, + materialized: "toDate(timestamp)", + }, { name: "userHash", type: "UInt64", materialized: "cityHash64(userId)" }, { name: "log_blob", type: "JSON", codec: "ZSTD(3)" }, { @@ -861,7 +865,11 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "id", type: "String" }, { name: "timestamp", type: /DateTime\('UTC'\)/ }, { name: "user_id", type: "String" }, - { name: "event_date", type: "Date", materialized: "toDate(timestamp)" }, + { + name: "event_date", + type: /Date(32)?/, + materialized: "toDate(timestamp)", + }, { name: "user_hash", type: "UInt64",