Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions docs/content/docs/sql/reference/queries/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Note: This version requires that your CDC data encodes updates using a full imag

```sql
SELECT * FROM FROM_CHANGELOG(
input => TABLE source_table,
input => TABLE source_table [PARTITION BY key_col],
[op => DESCRIPTOR(op_column_name),]
[op_mapping => MAP[
'c, r', 'INSERT',
Expand All @@ -61,7 +61,7 @@ SELECT * FROM FROM_CHANGELOG(

| Parameter | Required | Description |
|:-------------|:---------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `input` | Yes | The input table. Must be append-only. |
| `input` | Yes | The input table. Must be append-only. Use `PARTITION BY` to ensure rows for the same key are processed together. This is required when downstream operators are keyed on that column. |
| `op` | No | A `DESCRIPTOR` with a single column name for the operation code column. Defaults to `op`. The column must exist in the input table and be of type STRING. |
| `op_mapping` | No | A `MAP<STRING, STRING>` mapping user-defined codes to Flink change operation names. Keys are user-defined codes (e.g., `'c'`, `'u'`, `'d'`), values are Flink change operation names (`INSERT`, `UPDATE_BEFORE`, `UPDATE_AFTER`, `DELETE`). Keys can contain comma-separated codes to map multiple codes to the same operation (e.g., `'c, r'`). Each change operation may appear at most once across all entries. |
| `error_handling` | No | Controls behavior when an input row's operation code is `NULL` or not present in the `op_mapping`. Valid values: `FAIL` (default) — throw a `TableRuntimeException`, `SKIP` — silently drop the row. |
Expand Down Expand Up @@ -127,6 +127,14 @@ SELECT * FROM FROM_CHANGELOG(
-- The operation column named 'operation' is used instead of 'op'
```

#### Partitioning by a key

```sql
SELECT * FROM FROM_CHANGELOG(
input => TABLE cdc_stream PARTITION BY id
)
```

#### Invalid operation code handling

Two `error_handling` modes are supported. The job can either fail upon an invalid or unknown op code, or skip the row and continue processing.
Expand Down Expand Up @@ -177,18 +185,18 @@ This is useful when you need to materialize changelog events into a downstream s

```sql
SELECT * FROM TO_CHANGELOG(
input => TABLE source_table,
input => TABLE source_table [PARTITION BY key_col],
[op => DESCRIPTOR(op_column_name),]
[op_mapping => MAP['INSERT', 'I', 'DELETE', 'D', ...]]
)
```

### Parameters

| Parameter | Required | Description |
|:-------------|:---------|:------------|
| `input` | Yes | The input table. Accepts insert-only, retract, and upsert tables. |
| `op` | No | A `DESCRIPTOR` with a single column name for the operation code column. Defaults to `op`. |
| Parameter | Required | Description |
|:-------------|:---------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `input` | Yes | The input table. With `PARTITION BY`, rows with the same key are co-located and run in the same operator instance. Without `PARTITION BY`, each row is processed independently. Accepts insert-only, retract, and upsert tables. For upsert tables, the provided `PARTITION BY` key should match or be a subset of the upsert key of the subquery. |
| `op` | No | A `DESCRIPTOR` with a single column name for the operation code column. Defaults to `op`. |
| `op_mapping` | No | A `MAP<STRING, STRING>` mapping change operation names to custom output codes. Keys can contain comma-separated names to map multiple operations to the same code (e.g., `'INSERT, UPDATE_AFTER'`). When provided, only mapped operations are forwarded - unmapped events are dropped. Each change operation may appear at most once across all entries. |

#### Default op_mapping
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,17 @@ default TableResult executeInsert(
* TableRuntimeException} when an input row's op code is {@code NULL} or not present in the
* mapping; pass {@code error_handling => 'SKIP'} to silently drop those rows instead.
*
* <p>By default, the input is processed with row semantics (each row independently). To
* co-locate rows with the same key in the same parallel operator instance, partition the input
* first via {@link #partitionBy(Expression...)} and invoke the function via {@link
* PartitionedTable#process(String, Object...)}:
*
* <pre>{@code
* Table result = cdcStream
* .partitionBy($("id"))
* .process("FROM_CHANGELOG");
* }</pre>
*
* <p>Optional arguments can be passed using named expressions:
*
* <pre>{@code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.flink.table.types.inference.InputTypeStrategies;
import org.apache.flink.table.types.inference.StaticArgument;
import org.apache.flink.table.types.inference.StaticArgumentTrait;
import org.apache.flink.table.types.inference.TraitCondition;
import org.apache.flink.table.types.inference.TypeStrategies;
import org.apache.flink.table.types.inference.strategies.ArrayOfStringArgumentTypeStrategy;
import org.apache.flink.table.types.inference.strategies.SpecificInputTypeStrategies;
Expand Down Expand Up @@ -785,22 +786,22 @@ ANY, and(logical(LogicalTypeRoot.BOOLEAN), LITERAL)
.name("TO_CHANGELOG")
.kind(PROCESS_TABLE)
.staticArguments(
// Row semantics (no PARTITION BY). Accepts updating
// inputs. The planner inserts ChangelogNormalize for
// upsert sources to produce UPDATE_BEFORE and full
// DELETE rows.
// Row semantics (no PARTITION BY).
// With PARTITION BY, switches to set
// semantics for co-located parallel execution.
StaticArgument.table(
"input",
Row.class,
false,
EnumSet.of(
StaticArgumentTrait.TABLE,
StaticArgumentTrait.ROW_SEMANTIC_TABLE,
StaticArgumentTrait.SUPPORT_UPDATES,
StaticArgumentTrait.REQUIRE_UPDATE_BEFORE,
// Not strictly necessary but explicitly state that
// we require full deletes.
StaticArgumentTrait.REQUIRE_FULL_DELETE)),
"input",
Row.class,
false,
EnumSet.of(
StaticArgumentTrait.TABLE,
StaticArgumentTrait.ROW_SEMANTIC_TABLE,
StaticArgumentTrait.SUPPORT_UPDATES,
StaticArgumentTrait.REQUIRE_UPDATE_BEFORE,
StaticArgumentTrait.REQUIRE_FULL_DELETE))
.withConditionalTrait(
StaticArgumentTrait.SET_SEMANTIC_TABLE,
TraitCondition.hasPartitionBy()),
StaticArgument.scalar("op", DataTypes.DESCRIPTOR(), true),
StaticArgument.scalar(
"op_mapping",
Expand All @@ -817,13 +818,19 @@ ANY, and(logical(LogicalTypeRoot.BOOLEAN), LITERAL)
.name("FROM_CHANGELOG")
.kind(PROCESS_TABLE)
.staticArguments(
// Row semantics (no PARTITION BY).
// With PARTITION BY, switches to set
// semantics for co-located parallel execution.
StaticArgument.table(
"input",
Row.class,
false,
EnumSet.of(
StaticArgumentTrait.TABLE,
StaticArgumentTrait.ROW_SEMANTIC_TABLE)),
"input",
Row.class,
false,
EnumSet.of(
StaticArgumentTrait.TABLE,
StaticArgumentTrait.ROW_SEMANTIC_TABLE))
.withConditionalTrait(
StaticArgumentTrait.SET_SEMANTIC_TABLE,
TraitCondition.hasPartitionBy()),
StaticArgument.scalar("op", DataTypes.DESCRIPTOR(), true),
StaticArgument.scalar(
"op_mapping",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.table.types.inference;

import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;

/**
* Internal value-comparable wrapper used by all built-in {@link TraitCondition} factories. Equality
* is keyed by {@code kind + args}; the {@code impl} predicate is reused but never compared, so two
* conditions built from the same factory inputs are equal.
*
* <p>Lives outside {@link TraitCondition} because Java forbids {@code private} nested types in
* interfaces (they are implicitly {@code public static}); top-level package-private gives the same
* encapsulation.
*/
final class BuiltInCondition implements TraitCondition {

/** Tag identifying which factory produced the condition. */
enum Kind {
HAS_PARTITION_BY,
ARG_IS_EQUAL_TO,
NOT
}

private final Kind kind;
private final List<Object> args;
private final Predicate<TraitContext> impl;

BuiltInCondition(final Kind kind, final List<Object> args, final Predicate<TraitContext> impl) {
this.kind = kind;
this.args = args;
this.impl = impl;
}

@Override
public boolean test(final TraitContext ctx) {
return impl.test(ctx);
}

@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (!(o instanceof BuiltInCondition)) {
return false;
}
final BuiltInCondition that = (BuiltInCondition) o;
return kind == that.kind && args.equals(that.args);
}

@Override
public int hashCode() {
return Objects.hash(kind, args);
}
}
Loading