diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index b40ff54fbd829c..ad99d3b5c12836 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -127,12 +127,21 @@ private void initReader() throws IOException { private int[] getProjected() { return Arrays.stream(fields).mapToInt(fieldName -> { - int index = paimonAllFieldNames.indexOf(fieldName); + int index = getFieldIndex(paimonAllFieldNames, fieldName); Preconditions.checkArgument(index >= 0, "RequiredField %s not found in schema", fieldName); return index; }).toArray(); } + static int getFieldIndex(List fieldNames, String fieldName) { + for (int i = 0; i < fieldNames.size(); i++) { + if (fieldNames.get(i).equalsIgnoreCase(fieldName)) { + return i; + } + } + return -1; + } + private List getPredicates() { List predicates = PaimonUtils.deserialize(paimonPredicate); if (LOG.isDebugEnabled()) { @@ -154,7 +163,7 @@ private void resetDatetimeV2Precision() { if (types[i].isDateTimeV2()) { // paimon support precision > 6, but it has been reset as 6 in FE // try to get the right precision for datetimev2 - int index = paimonAllFieldNames.indexOf(fields[i]); + int index = getFieldIndex(paimonAllFieldNames, fields[i]); if (index != -1) { DataType dataType = table.rowType().getTypeAt(index); if (dataType instanceof TimestampType) { diff --git a/fe/be-java-extensions/paimon-scanner/src/test/java/org/apache/doris/paimon/PaimonJniScannerTest.java b/fe/be-java-extensions/paimon-scanner/src/test/java/org/apache/doris/paimon/PaimonJniScannerTest.java index 5b6f1600c7c6c2..74a57d787fe263 100644 --- a/fe/be-java-extensions/paimon-scanner/src/test/java/org/apache/doris/paimon/PaimonJniScannerTest.java +++ b/fe/be-java-extensions/paimon-scanner/src/test/java/org/apache/doris/paimon/PaimonJniScannerTest.java @@ -17,8 +17,10 @@ package org.apache.doris.paimon; +import org.junit.Assert; import org.junit.Test; +import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -33,4 +35,14 @@ public void testConstructorAcceptsEmptyProjection() { new PaimonJniScanner(128, params); } + + @Test + public void testGetFieldIndexMatchesMixedCaseColumns() { + Assert.assertEquals(1, PaimonJniScanner.getFieldIndex(Arrays.asList("data", "mIxEd_COL", "PART"), + "mixed_col")); + Assert.assertEquals(2, PaimonJniScanner.getFieldIndex(Arrays.asList("data", "mIxEd_COL", "PART"), + "part")); + Assert.assertEquals(-1, PaimonJniScanner.getFieldIndex(Arrays.asList("data", "mIxEd_COL", "PART"), + "missing_col")); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java index 56fa03120d9f37..de19d90728d606 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java @@ -29,6 +29,7 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import java.util.Collections; import java.util.List; @@ -37,14 +38,21 @@ */ public class DorisTypeToIcebergType extends DorisTypeVisitor { private final StructType root; + private final List rootFieldNames; private int nextId = 0; public DorisTypeToIcebergType() { this.root = null; + this.rootFieldNames = Collections.emptyList(); } public DorisTypeToIcebergType(StructType root) { + this(root, Collections.emptyList()); + } + + public DorisTypeToIcebergType(StructType root, List rootFieldNames) { this.root = root; + this.rootFieldNames = rootFieldNames; // the root struct's fields use the first ids this.nextId = root.getFields().size(); } @@ -65,10 +73,11 @@ public Type struct(StructType struct, List types) { Type type = types.get(i); int id = isRoot ? i : getNextId(); + String fieldName = isRoot && !rootFieldNames.isEmpty() ? rootFieldNames.get(i) : field.getName(); if (field.getContainsNull()) { - newFields.add(Types.NestedField.optional(id, field.getName(), type, field.getComment())); + newFields.add(Types.NestedField.optional(id, fieldName, type, field.getComment())); } else { - newFields.add(Types.NestedField.required(id, field.getName(), type, field.getComment())); + newFields.add(Types.NestedField.required(id, fieldName, type, field.getComment())); } } return Types.StructType.of(newFields); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java index 246ad347242045..d57168ddb2d6ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java @@ -368,8 +368,8 @@ public boolean performCreateTable(CreateTableInfo createTableInfo) throws UserEx .map(col -> new StructField(col.getName(), col.getType(), col.getComment(), col.isAllowNull())) .collect(Collectors.toList()); StructType structType = new StructType(new ArrayList<>(collect)); - Type visit = - DorisTypeVisitor.visit(structType, new DorisTypeToIcebergType(structType)); + List rootFieldNames = columns.stream().map(Column::getName).collect(Collectors.toList()); + Type visit = DorisTypeVisitor.visit(structType, new DorisTypeToIcebergType(structType, rootFieldNames)); Schema schema = new Schema(visit.asNestedType().asStructType().fields()); Map properties = createTableInfo.getProperties(); properties.put(ExternalCatalog.DORIS_VERSION, ExternalCatalog.DORIS_VERSION_VALUE); @@ -1348,7 +1348,7 @@ private org.apache.iceberg.SortOrder buildSortOrder(List sortFiel org.apache.iceberg.SortOrder.Builder builder = org.apache.iceberg.SortOrder.builderFor(schema); for (SortFieldInfo sortField : sortFields) { - String columnName = sortField.getColumnName(); + String columnName = getIcebergColumnName(schema, sortField.getColumnName()); if (sortField.isAscending()) { if (sortField.isNullFirst()) { builder.asc(columnName, org.apache.iceberg.NullOrder.NULLS_FIRST); @@ -1365,4 +1365,9 @@ private org.apache.iceberg.SortOrder buildSortOrder(List sortFiel } return builder.build(); } + + private static String getIcebergColumnName(Schema schema, String columnName) { + NestedField field = schema.caseInsensitiveFindField(columnName); + return field == null ? columnName : field.name(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java index 777250d702076e..9b4a4d0709dc9c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java @@ -137,7 +137,6 @@ import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -585,37 +584,43 @@ public static PartitionSpec solveIcebergPartitionSpec(PartitionDesc partitionDes PartitionSpec.Builder builder = PartitionSpec.builderFor(schema); for (Expr expr : partitionExprs) { if (expr instanceof SlotRef) { - builder.identity(((SlotRef) expr).getColumnName()); + builder.identity(getIcebergColumnName(schema, ((SlotRef) expr).getColumnName())); } else if (expr instanceof FunctionCallExpr) { String exprName = expr.accept(ExprToExprNameVisitor.INSTANCE, null); List params = ((FunctionCallExpr) expr).getParams().exprs(); switch (exprName.toLowerCase()) { case "bucket": builder.bucket( - params.get(1).accept(ExprToExprNameVisitor.INSTANCE, null), + getIcebergColumnName(schema, + params.get(1).accept(ExprToExprNameVisitor.INSTANCE, null)), Integer.parseInt(params.get(0).getStringValue())); break; case "year": case "years": - builder.year(params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null)); + builder.year(getIcebergColumnName(schema, + params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null))); break; case "month": case "months": - builder.month(params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null)); + builder.month(getIcebergColumnName(schema, + params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null))); break; case "date": case "day": case "days": - builder.day(params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null)); + builder.day(getIcebergColumnName(schema, + params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null))); break; case "date_hour": case "hour": case "hours": - builder.hour(params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null)); + builder.hour(getIcebergColumnName(schema, + params.get(0).accept(ExprToExprNameVisitor.INSTANCE, null))); break; case "truncate": builder.truncate( - params.get(1).accept(ExprToExprNameVisitor.INSTANCE, null), + getIcebergColumnName(schema, + params.get(1).accept(ExprToExprNameVisitor.INSTANCE, null)), Integer.parseInt(params.get(0).getStringValue())); break; default: @@ -626,6 +631,11 @@ public static PartitionSpec solveIcebergPartitionSpec(PartitionDesc partitionDes return builder.build(); } + private static String getIcebergColumnName(Schema schema, String columnName) { + Types.NestedField field = schema.caseInsensitiveFindField(columnName); + return field == null ? columnName : field.name(); + } + private static Type icebergPrimitiveTypeToDorisType(org.apache.iceberg.types.Type.PrimitiveType primitive, boolean enableMappingVarbinary, boolean enableMappingTimestampTz) { switch (primitive.typeId()) { @@ -771,7 +781,7 @@ public static List getIdentityPartitionColumns(Table table) { } String columnName = table.schema().findColumnName(partitionField.sourceId()); if (columnName != null) { - partitionColumns.add(columnName.toLowerCase(Locale.ROOT)); + partitionColumns.add(columnName); } } } @@ -803,8 +813,7 @@ public static Map getIdentityPartitionInfoMap(PartitionData part } Object value = partitionData.get(i); try { - partitionInfoMap.put(columnName.toLowerCase(Locale.ROOT), - serializePartitionValue(field.type(), value, timeZone)); + partitionInfoMap.put(columnName, serializePartitionValue(field.type(), value, timeZone)); } catch (UnsupportedOperationException e) { LOG.warn("Failed to serialize Iceberg table partition value for field {}: {}", field.name(), e.getMessage()); @@ -1168,7 +1177,7 @@ public static List parseSchema(Schema schema, boolean enableMappingVarbi List columns = schema.columns(); List resSchema = Lists.newArrayListWithCapacity(columns.size()); for (Types.NestedField field : columns) { - Column column = new Column(field.name().toLowerCase(Locale.ROOT), + Column column = new Column(field.name(), IcebergUtils.icebergTypeToDorisType(field.type(), enableMappingVarbinary, enableMappingTimestampTz), true, null, true, field.doc(), true, -1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java index 1775a984f2cd5f..6a744f765e8e2f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java @@ -346,7 +346,7 @@ public Optional initSchema(SchemaCacheKey key) { Set partitionColumnNames = Sets.newHashSet(tableSchema.partitionKeys()); List partitionColumns = Lists.newArrayList(); for (DataField field : columns) { - Column column = new Column(field.name().toLowerCase(), + Column column = new Column(field.name(), PaimonUtil.paimonTypeToDorisType(field.type(), getCatalog().getEnableMappingVarbinary(), getCatalog().getEnableMappingTimestampTz()), true, diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataOps.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataOps.java index b8263250c3dafd..2f0be3bc3054f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataOps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataOps.java @@ -57,6 +57,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; @@ -218,7 +219,9 @@ public boolean performCreateTable(CreateTableInfo createTableInfo) throws UserEx col.getComment(), col.isNullable())) .collect(Collectors.toList()); StructType structType = new StructType(new ArrayList<>(collect)); - Schema schema = toPaimonSchema(structType, createTableInfo.getPartitionDesc(), createTableInfo.getProperties()); + List rootFieldNames = columns.stream().map(ColumnDefinition::getName).collect(Collectors.toList()); + Schema schema = toPaimonSchema(structType, rootFieldNames, createTableInfo.getPartitionDesc(), + createTableInfo.getProperties()); try { catalog.createTable(new Identifier(createTableInfo.getDbName(), createTableInfo.getTableName()), schema, createTableInfo.isIfNotExists()); @@ -228,7 +231,8 @@ public boolean performCreateTable(CreateTableInfo createTableInfo) throws UserEx return false; } - private Schema toPaimonSchema(StructType structType, PartitionDesc partitionDesc, Map properties) { + private Schema toPaimonSchema(StructType structType, List rootFieldNames, PartitionDesc partitionDesc, + Map properties) { Map normalizedProperties = new HashMap<>(properties); normalizedProperties.remove(PRIMARY_KEY_IDENTIFIER); normalizedProperties.remove(PROP_COMMENT); @@ -242,19 +246,31 @@ private Schema toPaimonSchema(StructType structType, PartitionDesc partitionDesc .map(String::trim) .collect(Collectors.toList()); List partitionKeys = partitionDesc == null ? new ArrayList<>() : partitionDesc.getPartitionColNames(); + primaryKeys = getPaimonColumnNames(rootFieldNames, primaryKeys); + partitionKeys = getPaimonColumnNames(rootFieldNames, partitionKeys); Schema.Builder schemaBuilder = Schema.newBuilder() .options(normalizedProperties) .primaryKey(primaryKeys) .partitionKeys(partitionKeys) .comment(properties.getOrDefault(PROP_COMMENT, null)); - for (StructField field : structType.getFields()) { - schemaBuilder.column(field.getName(), + List fields = structType.getFields(); + for (int i = 0; i < fields.size(); i++) { + StructField field = fields.get(i); + schemaBuilder.column(rootFieldNames.get(i), toPaimontype(field.getType()).copy(field.getContainsNull()), field.getComment()); } return schemaBuilder.build(); } + private List getPaimonColumnNames(List paimonColumnNames, List dorisColumnNames) { + Map paimonColumnNameMap = paimonColumnNames.stream() + .collect(Collectors.toMap(name -> name.toLowerCase(Locale.ROOT), name -> name)); + return dorisColumnNames.stream() + .map(name -> paimonColumnNameMap.getOrDefault(name.toLowerCase(Locale.ROOT), name)) + .collect(Collectors.toList()); + } + private DataType toPaimontype(Type type) { return DorisTypeVisitor.visit(type, new DorisToPaimonTypeVisitor()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSysExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSysExternalTable.java index b6999b7c50c558..744562d0b71027 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSysExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonSysExternalTable.java @@ -255,7 +255,7 @@ private List buildFullSchema() { for (DataField field : fields) { Column column = new Column( - field.name().toLowerCase(), + field.name(), PaimonUtil.paimonTypeToDorisType( field.type(), getCatalog().getEnableMappingVarbinary(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java index 6c988adad30d97..c96ffa6146cf2c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java @@ -92,7 +92,6 @@ import java.util.Base64; import java.util.HashMap; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.regex.Pattern; @@ -504,7 +503,7 @@ public static List parseSchema(RowType rowType, List primaryKeys boolean enableTimestampTzMapping) { List resSchema = Lists.newArrayListWithCapacity(rowType.getFields().size()); rowType.getFields().forEach(field -> { - resSchema.add(new Column(field.name().toLowerCase(), + resSchema.add(new Column(field.name(), PaimonUtil.paimonTypeToDorisType(field.type(), enableVarbinaryMapping, enableTimestampTzMapping), primaryKeys.contains(field.name()), null, @@ -553,7 +552,7 @@ public static Map getPartitionInfoMap(Table table, BinaryRow par try { String partitionValue = serializePartitionValue(partitionType.getFields().get(i).type(), partitionValuesArray[i], timeZone); - partitionInfoMap.put(partitionKeys.get(i).toLowerCase(Locale.ROOT), partitionValue); + partitionInfoMap.put(partitionKeys.get(i), partitionValue); } catch (UnsupportedOperationException e) { LOG.warn("Failed to serialize table {} partition value for key {}: {}", table.name(), partitionKeys.get(i), e.getMessage()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonPredicateConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonPredicateConverter.java index 963904a4ff467d..867225fdf8b120 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonPredicateConverter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonPredicateConverter.java @@ -47,7 +47,7 @@ public class PaimonPredicateConverter { public PaimonPredicateConverter(RowType rowType) { this.builder = new PredicateBuilder(rowType); - this.fieldNames = rowType.getFields().stream().map(f -> f.name().toLowerCase()).collect(Collectors.toList()); + this.fieldNames = rowType.getFields().stream().map(DataField::name).collect(Collectors.toList()); this.paimonFieldTypes = rowType.getFields().stream().map(DataField::type).collect(Collectors.toList()); } @@ -100,7 +100,7 @@ private Predicate doInPredicate(InPredicate predicate) { return null; } String colName = slotRef.getColumnName(); - int idx = fieldNames.indexOf(colName); + int idx = getFieldIndex(colName); DataType dataType = paimonFieldTypes.get(idx); List valueList = new ArrayList<>(); for (int i = 1; i < predicate.getChildren().size(); i++) { @@ -132,7 +132,7 @@ private Predicate binaryExprDesc(Expr dorisExpr) { return null; } String colName = slotRef.getColumnName(); - int idx = fieldNames.indexOf(colName); + int idx = getFieldIndex(colName); DataType dataType = paimonFieldTypes.get(idx); Object value = dataType.accept(new PaimonValueConverter(literalExpr)); if (value == null) { @@ -174,6 +174,15 @@ private Predicate binaryExprDesc(Expr dorisExpr) { return null; } + private int getFieldIndex(String colName) { + for (int i = 0; i < fieldNames.size(); i++) { + if (fieldNames.get(i).equalsIgnoreCase(colName)) { + return i; + } + } + return fieldNames.indexOf(colName); + } + public static SlotRef convertDorisExprToSlotRef(Expr expr) { SlotRef slotRef = null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index 17a742b835a4fb..2eebf6ff62d7d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -69,7 +69,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; @@ -228,9 +227,7 @@ private List getOrderedPathPartitionKeys() { && !((PaimonSysExternalTable) externalTable).isDataTable()) { return Collections.emptyList(); } - return source.getPaimonTable().partitionKeys().stream() - .map(key -> key.toLowerCase(Locale.ROOT)) - .collect(Collectors.toList()); + return source.getPaimonTable().partitionKeys(); } private void putHistorySchemaInfo(Long schemaId) { @@ -582,13 +579,9 @@ public List getPaimonSplitFromAPI() throws long startTime = System.currentTimeMillis(); try { Table paimonTable = getProcessedTable(); + List fieldNames = paimonTable.rowType().getFieldNames(); int[] projected = desc.getSlots().stream().mapToInt( - slot -> paimonTable.rowType() - .getFieldNames() - .stream() - .map(String::toLowerCase) - .collect(Collectors.toList()) - .indexOf(slot.getColumn().getName())) + slot -> getFieldIndex(fieldNames, slot.getColumn().getName())) .filter(i -> i >= 0) .toArray(); ReadBuilder readBuilder = paimonTable.newReadBuilder(); @@ -612,6 +605,16 @@ public List getPaimonSplitFromAPI() throws } } + @VisibleForTesting + static int getFieldIndex(List fieldNames, String columnName) { + for (int i = 0; i < fieldNames.size(); i++) { + if (fieldNames.get(i).equalsIgnoreCase(columnName)) { + return i; + } + } + return -1; + } + private String getFileFormat(String path) { return FileFormatUtils.getFileFormatBySuffix(path).orElse(source.getFileFormatFromTableProperties()); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/CreateIcebergTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/CreateIcebergTableTest.java index 909359146eb55e..6962f911538f81 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/CreateIcebergTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/CreateIcebergTableTest.java @@ -214,6 +214,45 @@ public void testPartition() throws UserException { Assert.assertEquals("b", table.properties().get("a")); } + @Test + public void testPartitionPreservesNonLowercaseColumnNames() throws UserException { + TableIdentifier tb = TableIdentifier.of(dbName, getTableName()); + String sql = "create table " + tb + " (" + + "data int, " + + "`PART` int, " + + "`mIxEd_COL` int" + + ") engine = iceberg " + + "partition by (`PART`, bucket(2, `mIxEd_COL`)) ()"; + createTable(sql); + Table table = ops.getCatalog().loadTable(tb); + Schema schema = table.schema(); + + Assert.assertEquals("PART", schema.columns().get(1).name()); + Assert.assertEquals("mIxEd_COL", schema.columns().get(2).name()); + PartitionSpec spec = PartitionSpec.builderFor(schema) + .identity("PART") + .bucket("mIxEd_COL", 2) + .build(); + Assert.assertEquals(spec, table.spec()); + } + + @Test + public void testSortOrderResolvesNonLowercaseColumnNamesCaseInsensitively() throws UserException { + TableIdentifier tb = TableIdentifier.of(dbName, getTableName()); + String sql = "create table " + tb + " (" + + "data int, " + + "`mIxEd_COL` int" + + ") engine = iceberg " + + "order by (`mixed_col` asc)"; + createTable(sql); + Table table = ops.getCatalog().loadTable(tb); + Schema schema = table.schema(); + + Assert.assertEquals("mIxEd_COL", schema.columns().get(1).name()); + Assert.assertEquals(1, table.sortOrder().fields().size()); + Assert.assertEquals(schema.findField("mIxEd_COL").fieldId(), table.sortOrder().fields().get(0).sourceId()); + } + public void createTable(String sql) throws UserException { LogicalPlan plan = new NereidsParser().parseSingle(sql); Assertions.assertTrue(plan instanceof CreateTableCommand); diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java index 6bb96bf0d43c75..0a5a4ab11d4621 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java @@ -321,6 +321,13 @@ public void testGetPartitionSpecSqlIdentity() { Assertions.assertEquals("PARTITION BY LIST (`d_year`) ()", spy.getPartitionSpecSql()); } + @Test + public void testGetPartitionSpecSqlPreservesNonLowercaseColumnName() { + IcebergExternalTable spy = createSpyTable(); + setupSingleField(mockTransform("identity"), "mIxEd_COL"); + Assertions.assertEquals("PARTITION BY LIST (`mIxEd_COL`) ()", spy.getPartitionSpecSql()); + } + @Test public void testGetPartitionSpecSqlBucket() { IcebergExternalTable spy = createSpyTable(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java index 5b25da419ccc04..2cb33c84fb2399 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java @@ -204,6 +204,18 @@ public void testAppendRowLineageFieldsForV3AddsMetadataFields() { Assert.assertNotNull(schemaWithRowLineage.findField(MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.fieldId())); } + @Test + public void testParseSchemaPreservesNonLowercaseColumnNames() { + Schema schema = new Schema( + Types.NestedField.required(1, "mIxEd_COL", Types.IntegerType.get()), + Types.NestedField.required(2, "PART", Types.StringType.get())); + + List columns = IcebergUtils.parseSchema(schema, false, false); + + Assert.assertEquals("mIxEd_COL", columns.get(0).getName()); + Assert.assertEquals("PART", columns.get(1).getName()); + } + @Test public void testGetPartitionInfoMapSkipBinaryIdentityPartition() { Schema schema = new Schema( @@ -222,11 +234,11 @@ public void testGetPartitionInfoMapSkipBinaryIdentityPartition() { public void testGetIdentityPartitionColumnsIgnoresTransformPartitions() { Schema schema = new Schema( Types.NestedField.required(1, "id", Types.IntegerType.get()), - Types.NestedField.required(2, "dt", Types.StringType.get()), + Types.NestedField.required(2, "Dt", Types.StringType.get()), Types.NestedField.required(3, "ts", Types.TimestampType.withoutZone())); PartitionSpec specWithTransform = PartitionSpec.builderFor(schema) .withSpecId(1) - .identity("dt") + .identity("Dt") .day("ts") .build(); PartitionSpec identityOnlySpec = PartitionSpec.builderFor(schema) @@ -241,16 +253,16 @@ public void testGetIdentityPartitionColumnsIgnoresTransformPartitions() { Mockito.when(table.schema()).thenReturn(schema); Mockito.when(table.specs()).thenReturn(specs); - Assert.assertEquals(Arrays.asList("dt", "id"), IcebergUtils.getIdentityPartitionColumns(table)); + Assert.assertEquals(Arrays.asList("Dt", "id"), IcebergUtils.getIdentityPartitionColumns(table)); } @Test public void testGetIdentityPartitionInfoMapReturnsIdentityColumnsOnly() { Schema schema = new Schema( - Types.NestedField.required(1, "dt", Types.StringType.get()), + Types.NestedField.required(1, "Dt", Types.StringType.get()), Types.NestedField.required(2, "ts", Types.TimestampType.withoutZone())); PartitionSpec partitionSpec = PartitionSpec.builderFor(schema) - .identity("dt") + .identity("Dt") .day("ts") .build(); PartitionData partitionData = new PartitionData(partitionSpec.partitionType()); @@ -262,7 +274,7 @@ public void testGetIdentityPartitionInfoMapReturnsIdentityColumnsOnly() { Map partitionInfoMap = IcebergUtils.getIdentityPartitionInfoMap( partitionData, partitionSpec, table, "UTC"); - Assert.assertEquals(Collections.singletonMap("dt", "2025-01-01"), partitionInfoMap); + Assert.assertEquals(Collections.singletonMap("Dt", "2025-01-01"), partitionInfoMap); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonMetadataOpsTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonMetadataOpsTest.java index e4146faa690ba9..dda2c3d23447a4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonMetadataOpsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonMetadataOpsTest.java @@ -53,6 +53,7 @@ import java.util.HashMap; import java.util.List; import java.util.UUID; +import java.util.stream.Collectors; public class PaimonMetadataOpsTest { public static String warehouse; @@ -196,6 +197,30 @@ public void testPartition() throws Exception { Assert.assertEquals(1, table.primaryKeys().size()); } + @Test + public void testPartitionPreservesNonLowercaseColumnNames() throws Exception { + String tableName = getTableName(); + Identifier identifier = new Identifier(dbName, tableName); + String sql = "create table " + dbName + "." + tableName + " (" + + "data int, " + + "`PART` int, " + + "`mIxEd_COL` int" + + ") engine = paimon " + + "partition by (`PART`) ()"; + createTable(sql); + Catalog catalog = ops.getCatalog(); + Table table = catalog.getTable(identifier); + + List columnNames = table.rowType().getFields().stream() + .map(DataField::name) + .collect(Collectors.toList()); + + Assert.assertEquals("PART", columnNames.get(1)); + Assert.assertEquals("mIxEd_COL", columnNames.get(2)); + Assert.assertEquals(1, table.partitionKeys().size()); + Assert.assertEquals("PART", table.partitionKeys().get(0)); + } + @Test public void testBucket() throws Exception { String tableName = getTableName(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonUtilTest.java index 4a2b609023bb93..050e547816d404 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/PaimonUtilTest.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.paimon; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Type; import org.apache.doris.thrift.TPrimitiveType; import org.apache.doris.thrift.schema.external.TFieldPtr; @@ -30,6 +31,7 @@ import org.apache.paimon.types.CharType; import org.apache.paimon.types.DataField; import org.apache.paimon.types.DataTypes; +import org.apache.paimon.types.RowType; import org.apache.paimon.types.VarCharType; import org.junit.Assert; import org.junit.Test; @@ -84,7 +86,20 @@ public void testGetPartitionInfoMapSupportsFloatingPointPartitions() { } @Test - public void testGetPartitionInfoMapUsesLowerCaseKeys() { + public void testParseSchemaPreservesNonLowercaseColumnNames() { + RowType rowType = DataTypes.ROW( + DataTypes.FIELD(0, "mIxEd_COL", DataTypes.INT()), + DataTypes.FIELD(1, "PART", DataTypes.STRING())); + + List columns = PaimonUtil.parseSchema(rowType, Collections.singletonList("PART"), false, false); + + Assert.assertEquals("mIxEd_COL", columns.get(0).getName()); + Assert.assertEquals("PART", columns.get(1).getName()); + Assert.assertTrue(columns.get(1).isKey()); + } + + @Test + public void testGetPartitionInfoMapPreservesNonLowercaseKeys() { DataField mixedCasePartition = DataTypes.FIELD(0, "Dt", DataTypes.STRING()); Table table = Mockito.mock(Table.class); Mockito.when(table.name()).thenReturn("mock_table"); @@ -95,8 +110,8 @@ public void testGetPartitionInfoMapUsesLowerCaseKeys() { Map partitionInfoMap = PaimonUtil.getPartitionInfoMap(table, partitionValues, "UTC"); - Assert.assertFalse(partitionInfoMap.containsKey("Dt")); - Assert.assertEquals("2026-05-26", partitionInfoMap.get("dt")); + Assert.assertFalse(partitionInfoMap.containsKey("dt")); + Assert.assertEquals("2026-05-26", partitionInfoMap.get("Dt")); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/source/PaimonScanNodeTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/source/PaimonScanNodeTest.java index c5ddc82a3f63df..0c7ca7c59408ae 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/source/PaimonScanNodeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/paimon/source/PaimonScanNodeTest.java @@ -550,7 +550,7 @@ public void testGetPathPartitionKeysReturnsTablePartitionKeys() throws Exception Mockito.when(sysTable.isDataTable()).thenReturn(true); node.setSource(source); - Assert.assertEquals(Arrays.asList("dt", "region"), node.getPathPartitionKeys()); + Assert.assertEquals(Arrays.asList("Dt", "Region"), node.getPathPartitionKeys()); } @Test @@ -583,19 +583,28 @@ public void testSetPaimonParamsUsesOrderedPartitionKeys() throws Exception { rangeDesc.setColumnsFromPath(Collections.singletonList("old")); rangeDesc.setColumnsFromPathIsNull(Collections.singletonList(false)); Map partitionValues = new HashMap<>(); - partitionValues.put("dt", "2025-01-01"); - partitionValues.put("pt", "p1"); + partitionValues.put("Dt", "2025-01-01"); + partitionValues.put("Pt", "p1"); PaimonSplit split = new PaimonSplit(createDataSplit("ordered.parquet")); split.setPaimonPartitionValues(partitionValues); invokePrivateMethod(node, "setPaimonParams", new Class[] {TFileRangeDesc.class, PaimonSplit.class}, rangeDesc, split); - Assert.assertEquals(Arrays.asList("pt", "dt"), rangeDesc.getColumnsFromPathKeys()); + Assert.assertEquals(Arrays.asList("Pt", "Dt"), rangeDesc.getColumnsFromPathKeys()); Assert.assertEquals(Arrays.asList("p1", "2025-01-01"), rangeDesc.getColumnsFromPath()); Assert.assertEquals(Arrays.asList(false, false), rangeDesc.getColumnsFromPathIsNull()); } + @Test + public void testGetFieldIndexMatchesMixedCaseColumns() { + List fieldNames = Arrays.asList("data", "mIxEd_COL", "PART"); + + Assert.assertEquals(1, PaimonScanNode.getFieldIndex(fieldNames, "mixed_col")); + Assert.assertEquals(2, PaimonScanNode.getFieldIndex(fieldNames, "part")); + Assert.assertEquals(-1, PaimonScanNode.getFieldIndex(fieldNames, "missing_col")); + } + private void mockJniReader(PaimonScanNode spyNode) { Mockito.doReturn(false).when(spyNode).supportNativeReader(ArgumentMatchers.any(Optional.class)); } diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_invaild_avro_name.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_invaild_avro_name.out index 1b8af743c6a0f2..be0c0e2236450e 100644 --- a/regression-test/data/external_table_p0/iceberg/test_iceberg_invaild_avro_name.out +++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_invaild_avro_name.out @@ -1,7 +1,7 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !desc -- id int Yes true \N -test:a1b2.raw.abc-gg-1-a text Yes true \N +TEST:A1B2.RAW.ABC-GG-1-A text Yes true \N -- !q_1 -- 1 row1 @@ -29,7 +29,7 @@ test:a1b2.raw.abc-gg-1-a text Yes true \N -- !desc -- id int Yes true \N -test:a1b2.raw.abc-gg-1-a text Yes true \N +TEST:A1B2.RAW.ABC-GG-1-A text Yes true \N -- !q_1 -- 1 row1 @@ -54,4 +54,3 @@ test:a1b2.raw.abc-gg-1-a text Yes true \N 3 row3 2 row2 1 row1 - diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy index 720c4e21911064..a011a9f64a7eb0 100644 --- a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy +++ b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy @@ -307,7 +307,7 @@ suite("test_paimon_catalog", "p0,external") { test { sql """select * from dup_columns_table;""" - exception "Duplicate column name found: id" + exception "Duplicate column name found: ID" } sql """ set force_jni_scanner=false; """ @@ -332,4 +332,3 @@ suite("test_paimon_catalog", "p0,external") { } } - diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy index 82d8d5b0dfae10..1e7899d8272eb4 100644 --- a/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy @@ -247,7 +247,7 @@ suite("test_paimon_jdbc_catalog", "p0,external") { assertSystemTableReadable( "paimon_jdbc_row_tracking_tbl\$row_tracking", - ["_row_id", "_sequence_number"], + ["_ROW_ID", "_SEQUENCE_NUMBER"], 1 ) } finally {