diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 359be7ed0f1dd7..474ef8e193115b 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1068,7 +1068,7 @@ identityOrFunction ; dataDesc - : ((WITH)? mergeType)? DATA INFILE LEFT_PAREN filePaths+=STRING_LITERAL (COMMA filePath+=STRING_LITERAL)* RIGHT_PAREN + : ((WITH)? mergeType)? DATA INFILE LEFT_PAREN filePaths+=STRING_LITERAL (COMMA filePaths+=STRING_LITERAL)* RIGHT_PAREN (negative=NEGATIVE)? INTO TABLE targetTableName=identifier (partitionSpec)? diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index db82e639971085..8c6a977f2d94d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -2275,7 +2275,7 @@ public LogicalPlan visitLoad(DorisParser.LoadContext ctx) { for (Token filePath : ddc.filePaths) { multiFilePaths.add(filePath.getText().substring(1, filePath.getText().length() - 1)); } - List filePaths = ddc.filePath == null ? null : multiFilePaths; + List filePaths = multiFilePaths.isEmpty() ? null : multiFilePaths; List colMappings; if (ddc.columnMapping == null) { colMappings = null; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java index 6a0c9ffab39e09..60ee030dcf3624 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java @@ -117,6 +117,34 @@ public void testLoadCommandBitmap() { Assertions.assertTrue(dataDescription.getColumnMappingList().get(1).child(0).getExpressionName().contains("userid_bitmap")); } + @Test + public void testLoadCommandWithMultipleFiles() { + String loadSql = "LOAD LABEL customer_multiple_files_test( " + + " DATA INFILE(\"s3://bucket/customer/part-1\", " + + " \"s3://bucket/customer/part-2\", " + + " \"s3://bucket/customer/part-3\") " + + " INTO TABLE customer" + + " ) " + + " WITH S3( " + + " \"s3.access_key\" = \"AK\", " + + " \"s3.secret_key\" = \"SK\", " + + " \"s3.endpoint\" = \"cos.ap-beijing.myqcloud.com\", " + + " \"s3.region\" = \"ap-beijing\");"; + + List> statements = new NereidsParser().parseMultiple(loadSql); + Assertions.assertFalse(statements.isEmpty()); + + LoadCommand command = (LoadCommand) statements.get(0).first; + List dataDescriptions = command.getDataDescriptions(); + Assertions.assertFalse(dataDescriptions.isEmpty()); + + List filePaths = dataDescriptions.get(0).getFilePaths(); + Assertions.assertEquals(3, filePaths.size()); + Assertions.assertEquals("s3://bucket/customer/part-1", filePaths.get(0)); + Assertions.assertEquals("s3://bucket/customer/part-2", filePaths.get(1)); + Assertions.assertEquals("s3://bucket/customer/part-3", filePaths.get(2)); + } + @Test public void testLoadCommand() throws Exception { String loadSql1 = "LOAD LABEL customer_lable_for_test( " diff --git a/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out b/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out index e408e27718c077..e7136e3aed638c 100644 --- a/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out +++ b/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out @@ -5,3 +5,5 @@ -- !pr22666_2 -- 100490 +-- !multi_infile_count -- +200000 diff --git a/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy b/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy index 545f7d2ce81805..e5ebea8339bb75 100644 --- a/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy +++ b/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy @@ -85,5 +85,60 @@ suite("test_broker_load_multi_filegroup", "p0") { order_qt_pr22666_1 """ select count(*) from ${tbl_22666} where p_brand is not null limit 10;""" order_qt_pr22666_2 """ select count(*) from ${tbl_22666} where p_name is not null limit 10;""" -} + def tbl_multi_infile = "part_multi_infile" + sql """drop table if exists ${tbl_multi_infile} force""" + sql """ + CREATE TABLE ${tbl_multi_infile} ( + p_partkey int NULL, + p_name VARCHAR(55) NULL, + p_mfgr VARCHAR(25) NULL + )ENGINE=OLAP + DUPLICATE KEY(`p_partkey`) + DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + def label_multi_infile = "part_multi_infile_" + UUID.randomUUID().toString().replace("-", "0") + sql """ + LOAD LABEL ${label_multi_infile} ( + DATA INFILE( + "s3://${s3BucketName}/regression/load/data/part0.parquet", + "s3://${s3BucketName}/regression/load/data/part1.parquet" + ) + INTO TABLE ${tbl_multi_infile} + FORMAT AS "PARQUET" + (p_partkey, p_name, p_mfgr) + ) + WITH S3 ( + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_ENDPOINT" = "${s3Endpoint}", + "AWS_REGION" = "${s3Region}", + "provider" = "${getS3Provider()}" + ); + """ + + max_try_milli_secs = 600000 + while (max_try_milli_secs > 0) { + def String[][] result = sql """ show load where label="$label_multi_infile" order by createtime desc limit 1; """ + logger.info("Load status: " + result[0]) + if (result[0][2].equals("FINISHED")) { + logger.info("Load FINISHED " + label_multi_infile) + break; + } + if (result[0][2].equals("CANCELLED")) { + assertTrue(false, "load failed: $result") + break; + } + Thread.sleep(1000) + max_try_milli_secs -= 1000 + if(max_try_milli_secs <= 0) { + assertTrue(1 == 2, "load Timeout: $label_multi_infile") + } + } + + order_qt_multi_infile_count """ select count(*) from ${tbl_multi_infile};""" + +}