Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ case class CometScanExec(
* initialized. See SPARK-26327 for more details.
*/
private def sendDriverMetrics(): Unit = {
driverMetrics.foreach(e => metrics(e._1).add(e._2))
driverMetrics.foreach(e => metrics(e._1).set(e._2))
val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
SQLMetrics.postDriverMetricUpdates(
sparkContext,
Expand Down
33 changes: 33 additions & 0 deletions spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2173,6 +2173,39 @@ class CometExecSuite extends CometTestBase {
}
}

test("Native_datafusion reports correct files and bytes scanned") {
withTempDir { dir =>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
withTempDir { dir =>
val inputFiles = 2
withTempDir { dir =>

val path = new java.io.File(dir, "test_metrics").getAbsolutePath
spark.range(100).repartition(2).write.mode("overwrite").parquet(path)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
spark.range(100).repartition(2).write.mode("overwrite").parquet(path)
spark.range(100).repartition(inputFiles).write.mode("overwrite").parquet(path)


withSQLConf(
CometConf.COMET_ENABLED.key -> "true",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please include --conf spark.comet.scan.impl=native_datafusion

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @comphead for review. I added this to the latest commit.

CometConf.COMET_EXEC_ENABLED.key -> "true",
CometConf.COMET_NATIVE_SCAN_IMPL.key -> "native_datafusion") {
val df = spark.read.parquet(path)

// Trigger two different actions to ensure metrics are not duplicated
df.count()
df.collect()

val scanNode = stripAQEPlan(df.queryExecution.executedPlan)
.collectFirst {
case n: org.apache.spark.sql.comet.CometNativeScanExec => n
case n: org.apache.spark.sql.comet.CometScanExec => n
}
.getOrElse {
fail(
s"Comet scan node not found in the physical plan. Plan: \n${df.queryExecution.executedPlan}")
}

val numFiles = scanNode.metrics("numFiles").value
assert(
numFiles == 2,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
numFiles == 2,
numFiles == inputFiles,

s"Expected exactly 2 files to be scanned, but got metrics reporting $numFiles")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
s"Expected exactly 2 files to be scanned, but got metrics reporting $numFiles")
s"Expected exactly $inputFiles files to be scanned, but got metrics reporting $numFiles")

}
}
}

}

case class BucketedTableTestSpec(
Expand Down
Loading