Skip to content

Commit 13fec97

Browse files
committed
[SPARK-54831][SQL] Add COMMENT ON COLUMN support to set and remove column comments
1 parent 87a3b06 commit 13fec97

File tree

8 files changed

+281
-5
lines changed

8 files changed

+281
-5
lines changed

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,12 @@
806806
],
807807
"sqlState" : "22003"
808808
},
809+
"COMMENT_ON_COLUMN_REQUIRES_TABLE_NAME" : {
810+
"message" : [
811+
"COMMENT ON COLUMN requires at least table name and column name (e.g., table.column, database.table.column, or catalog.database.table.column). Found: <columnRef>."
812+
],
813+
"sqlState" : "42601"
814+
},
809815
"COMPARATOR_RETURNS_NULL" : {
810816
"message" : [
811817
"The comparator has returned a NULL for a comparison between <firstValue> and <secondValue>.",

sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ statement
379379
| COMMENT ON namespace identifierReference IS
380380
comment #commentNamespace
381381
| COMMENT ON TABLE identifierReference IS comment #commentTable
382+
| COMMENT ON COLUMN multipartIdentifier IS comment #commentColumn
382383
| REFRESH TABLE identifierReference #refreshTable
383384
| REFRESH FUNCTION identifierReference #refreshFunction
384385
| REFRESH (stringLit | .*?) #refreshResource

sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,19 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
830830
ctx)
831831
}
832832

833+
/**
834+
* Indicates that COMMENT ON COLUMN must specify a table name along with column name.
835+
*
836+
* @throws ParseException
837+
* Always throws this exception
838+
*/
839+
def commentOnColumnRequiresTableNameError(ctx: ParserRuleContext): Nothing = {
840+
throw new ParseException(
841+
errorClass = "COMMENT_ON_COLUMN_REQUIRES_TABLE_NAME",
842+
messageParameters = Map("columnRef" -> ctx.getText),
843+
ctx = ctx)
844+
}
845+
833846
/**
834847
* Throws an internal error for unexpected parameter markers found during AST building. This
835848
* should be unreachable in normal operation due to grammar-level blocking.

sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,16 @@ case class StructField(
149149
}
150150

151151
/**
152-
* Updates the StructField with a new comment value.
152+
* Updates the StructField with a new comment value. If the comment is null, the comment key is
153+
* removed from metadata.
153154
*/
154155
def withComment(comment: String): StructField = {
155-
val newMetadata = new MetadataBuilder()
156-
.withMetadata(metadata)
157-
.putString("comment", comment)
158-
.build()
156+
val builder = new MetadataBuilder().withMetadata(metadata)
157+
val newMetadata = if (comment == null) {
158+
builder.remove("comment").build()
159+
} else {
160+
builder.putString("comment", comment).build()
161+
}
159162
copy(metadata = newMetadata)
160163
}
161164

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6397,6 +6397,25 @@ class AstBuilder extends DataTypeAstBuilder
63976397
CommentOnTable(createUnresolvedTable(ctx.identifierReference, "COMMENT ON TABLE"), comment)
63986398
}
63996399

6400+
override def visitCommentColumn(ctx: CommentColumnContext): LogicalPlan = withOrigin(ctx) {
6401+
val comment = visitComment(ctx.comment)
6402+
val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
6403+
if (nameParts.length < 2) {
6404+
throw QueryParsingErrors.commentOnColumnRequiresTableNameError(ctx)
6405+
}
6406+
// The last part is the column name, everything before that is the table name
6407+
// This allows:
6408+
// - table.column (uses current catalog and database)
6409+
// - database.table.column (uses current catalog)
6410+
// - catalog.database.table.column (fully qualified)
6411+
val tableName = nameParts.init
6412+
val columnName = Seq(nameParts.last)
6413+
CommentOnColumn(
6414+
UnresolvedTable(tableName, "COMMENT ON COLUMN"),
6415+
UnresolvedFieldName(columnName),
6416+
comment)
6417+
}
6418+
64006419
override def visitComment (ctx: CommentContext): String = {
64016420
Option(ctx.stringLit()).map(s => string(visitStringLit(s))).getOrElse("")
64026421
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,28 @@ case class CommentOnTable(table: LogicalPlan, comment: String) extends AlterTabl
5858
copy(table = newChild)
5959
}
6060

61+
/**
62+
* The logical plan that defines or changes the comment of a COLUMN for v2 catalogs.
63+
*
64+
* {{{
65+
* COMMENT ON COLUMN catalog.namespace.table.column IS ('text' | NULL)
66+
* }}}
67+
*
68+
* where the `text` is the new comment written as a string literal; or `NULL` to drop the comment.
69+
*/
70+
case class CommentOnColumn(
71+
table: LogicalPlan,
72+
column: FieldName,
73+
comment: String) extends AlterTableCommand {
74+
override def changes: Seq[TableChange] = {
75+
require(column.resolved, "FieldName should be resolved before it's converted to TableChange.")
76+
Seq(TableChange.updateColumnComment(column.name.toArray, comment))
77+
}
78+
override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
79+
copy(table = newChild)
80+
}
81+
82+
6183
/**
6284
* The logical plan of the ALTER TABLE ... SET LOCATION command.
6385
*/

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2626,6 +2626,46 @@ class DDLParserSuite extends AnalysisTest {
26262626
comparePlans(
26272627
parsePlan("COMMENT ON TABLE a.b.c IS 'xYz'"),
26282628
CommentOnTable(UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON TABLE"), "xYz"))
2629+
2630+
// Test COMMENT ON COLUMN with fully qualified name (catalog.database.table.column)
2631+
comparePlans(
2632+
parsePlan("COMMENT ON COLUMN my_catalog.my_db.my_table.my_column IS NULL"),
2633+
CommentOnColumn(
2634+
UnresolvedTable(Seq("my_catalog", "my_db", "my_table"), "COMMENT ON COLUMN"),
2635+
UnresolvedFieldName(Seq("my_column")),
2636+
""))
2637+
2638+
// Test COMMENT ON COLUMN with 3-part name (database.table.column)
2639+
comparePlans(
2640+
parsePlan("COMMENT ON COLUMN a.b.c.d IS 'column comment'"),
2641+
CommentOnColumn(
2642+
UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON COLUMN"),
2643+
UnresolvedFieldName(Seq("d")),
2644+
"column comment"))
2645+
2646+
// Test COMMENT ON COLUMN with 2-part name (table.column) - uses current catalog and database
2647+
comparePlans(
2648+
parsePlan("COMMENT ON COLUMN my_table.my_column IS 'simple comment'"),
2649+
CommentOnColumn(
2650+
UnresolvedTable(Seq("my_table"), "COMMENT ON COLUMN"),
2651+
UnresolvedFieldName(Seq("my_column")),
2652+
"simple comment"))
2653+
2654+
// Test COMMENT ON COLUMN with 'NULL' string literal
2655+
comparePlans(
2656+
parsePlan("COMMENT ON COLUMN a.b.c.d IS 'NULL'"),
2657+
CommentOnColumn(
2658+
UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON COLUMN"),
2659+
UnresolvedFieldName(Seq("d")),
2660+
"NULL"))
2661+
2662+
// Test COMMENT ON COLUMN with empty string
2663+
comparePlans(
2664+
parsePlan("COMMENT ON COLUMN a.b.c.d IS ''"),
2665+
CommentOnColumn(
2666+
UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON COLUMN"),
2667+
UnresolvedFieldName(Seq("d")),
2668+
""))
26292669
}
26302670

26312671
test("create table - without using") {
@@ -2775,6 +2815,7 @@ class DDLParserSuite extends AnalysisTest {
27752815
None,
27762816
None,
27772817
dropDefault = true))))
2818+
27782819
// Make sure that the parser returns an exception when the feature is disabled.
27792820
withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
27802821
val sql = "CREATE TABLE my_tab(a INT, b STRING NOT NULL DEFAULT \"abc\") USING parquet"

sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2830,6 +2830,177 @@ class DataSourceV2SQLSuiteV1Filter
28302830
}
28312831
}
28322832

2833+
test("COMMENT ON COLUMN") {
2834+
// use the default v2 session catalog.
2835+
spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
2836+
// Session catalog is used.
2837+
withTable("t") {
2838+
sql("CREATE TABLE t(k int COMMENT 'original comment', v string) USING json")
2839+
2840+
// Verify original comment
2841+
val originalComment = sql("DESCRIBE t").filter("col_name = 'k'")
2842+
.select("comment").first().getString(0)
2843+
assert(originalComment === "original comment",
2844+
"Expected original comment to be set")
2845+
2846+
// Set a new comment
2847+
checkColumnComment("t", "k", "new comment")
2848+
2849+
// Verify comment is updated
2850+
val newComment = sql("DESCRIBE t").filter("col_name = 'k'")
2851+
.select("comment").first().getString(0)
2852+
assert(newComment === "new comment",
2853+
"Expected comment to be updated to 'new comment'")
2854+
2855+
// Remove comment by setting to NULL
2856+
checkColumnComment("t", "k", null)
2857+
2858+
// Verify comment is removed
2859+
val removedComment = sql("DESCRIBE t").filter("col_name = 'k'")
2860+
.select("comment").first().getString(0)
2861+
assert(removedComment.isEmpty,
2862+
"Expected comment to be removed")
2863+
2864+
// Set comment to literal "NULL" string
2865+
checkColumnComment("t", "k", "NULL")
2866+
val nullStringComment = sql("DESCRIBE t").filter("col_name = 'k'")
2867+
.select("comment").first().getString(0)
2868+
assert(nullStringComment === "NULL",
2869+
"Expected comment to be set to 'NULL' string")
2870+
2871+
// Set comment to empty string
2872+
checkColumnComment("t", "k", "")
2873+
val emptyComment = sql("DESCRIBE t").filter("col_name = 'k'")
2874+
.select("comment").first().getString(0)
2875+
assert(emptyComment === "",
2876+
"Expected comment to be set to empty string (not removed)")
2877+
2878+
// Add comment to column that didn't have one
2879+
checkColumnComment("t", "v", "new column comment")
2880+
val vComment = sql("DESCRIBE t").filter("col_name = 'v'")
2881+
.select("comment").first().getString(0)
2882+
assert(vComment === "new column comment",
2883+
"Expected comment to be set on column v")
2884+
2885+
// Remove comment from column v using COMMENT ON COLUMN IS NULL
2886+
checkColumnComment("t", "v", null)
2887+
val removedVComment = sql("DESCRIBE t").filter("col_name = 'v'")
2888+
.select("comment").first().getString(0)
2889+
assert(removedVComment.isEmpty,
2890+
"Expected comment to be removed using COMMENT ON COLUMN IS NULL")
2891+
}
2892+
2893+
// V2 non-session catalog is used.
2894+
withTable("testcat.ns1.ns2.t") {
2895+
sql("CREATE TABLE testcat.ns1.ns2.t(k int COMMENT 'original', v string) USING foo")
2896+
2897+
// Verify original comment
2898+
val originalComment = sql("DESCRIBE testcat.ns1.ns2.t").filter("col_name = 'k'")
2899+
.select("comment").first().getString(0)
2900+
assert(originalComment === "original",
2901+
"Expected original comment for testcat table")
2902+
2903+
// Set a comment
2904+
checkColumnComment("testcat.ns1.ns2.t", "k", "updated comment")
2905+
2906+
// Verify comment is set
2907+
val updatedComment = sql("DESCRIBE testcat.ns1.ns2.t").filter("col_name = 'k'")
2908+
.select("comment").first().getString(0)
2909+
assert(updatedComment === "updated comment",
2910+
"Expected comment to be updated for testcat table")
2911+
2912+
// Remove comment by setting to NULL
2913+
checkColumnComment("testcat.ns1.ns2.t", "k", null)
2914+
2915+
// Verify comment is removed
2916+
val removedComment = sql("DESCRIBE testcat.ns1.ns2.t").filter("col_name = 'k'")
2917+
.select("comment").first().getString(0)
2918+
assert(removedComment === null || removedComment.isEmpty,
2919+
"Expected comment to be removed from testcat table")
2920+
2921+
// Set comment to empty string
2922+
checkColumnComment("testcat.ns1.ns2.t", "k", "")
2923+
val emptyComment = sql("DESCRIBE testcat.ns1.ns2.t").filter("col_name = 'k'")
2924+
.select("comment").first().getString(0)
2925+
assert(emptyComment === "",
2926+
"Expected comment to be set to empty string for testcat table")
2927+
2928+
// Set a comment on column v and then remove it using COMMENT ON COLUMN IS NULL
2929+
sql("COMMENT ON COLUMN testcat.ns1.ns2.t.v IS 'temp comment'")
2930+
val tempComment = sql("DESCRIBE testcat.ns1.ns2.t").filter("col_name = 'v'")
2931+
.select("comment").first().getString(0)
2932+
assert(tempComment === "temp comment",
2933+
"Expected temp comment to be set")
2934+
2935+
checkColumnComment("testcat.ns1.ns2.t", "v", null)
2936+
val droppedVComment = sql("DESCRIBE testcat.ns1.ns2.t").filter("col_name = 'v'")
2937+
.select("comment").first().getString(0)
2938+
assert(droppedVComment === null || droppedVComment.isEmpty,
2939+
"Expected comment to be removed using COMMENT ON COLUMN IS NULL in testcat")
2940+
}
2941+
2942+
// Test error case: column not found
2943+
withTable("t2") {
2944+
sql("CREATE TABLE t2(k int) USING json")
2945+
val sql1 = "COMMENT ON COLUMN t2.nonexistent IS 'test'"
2946+
checkError(
2947+
exception = analysisException(sql1),
2948+
condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
2949+
parameters = Map(
2950+
"objectName" -> "`nonexistent`",
2951+
"proposal" -> "`k`"),
2952+
context = ExpectedContext(
2953+
fragment = sql1,
2954+
start = 0,
2955+
stop = 41))
2956+
}
2957+
2958+
// Test with different qualification levels
2959+
withDatabase("test_db") {
2960+
sql("CREATE DATABASE test_db")
2961+
sql("USE test_db")
2962+
withTable("test_db.qualified_test") {
2963+
sql("CREATE TABLE qualified_test(x int, y string) USING json")
2964+
2965+
// Test with just table.column (uses current database)
2966+
sql("COMMENT ON COLUMN qualified_test.x IS 'test with table.column'")
2967+
val comment1 = sql("DESCRIBE qualified_test").filter("col_name = 'x'")
2968+
.select("comment").first().getString(0)
2969+
assert(comment1 === "test with table.column",
2970+
"Expected comment to be set using table.column format")
2971+
2972+
// Test with database.table.column
2973+
sql("COMMENT ON COLUMN test_db.qualified_test.y IS 'test with database.table.column'")
2974+
val comment2 = sql("DESCRIBE qualified_test").filter("col_name = 'y'")
2975+
.select("comment").first().getString(0)
2976+
assert(comment2 === "test with database.table.column",
2977+
"Expected comment to be set using database.table.column format")
2978+
2979+
// Test removing comment with just table.column
2980+
sql("COMMENT ON COLUMN qualified_test.x IS NULL")
2981+
val removedComment = sql("DESCRIBE qualified_test").filter("col_name = 'x'")
2982+
.select("comment").first().getString(0)
2983+
assert(removedComment === null || removedComment.isEmpty,
2984+
"Expected comment to be removed using table.column format")
2985+
}
2986+
}
2987+
}
2988+
2989+
private def checkColumnComment(tableName: String, columnName: String, comment: String): Unit = {
2990+
sql(s"COMMENT ON COLUMN $tableName.$columnName IS " +
2991+
Option(comment).map("'" + _ + "'").getOrElse("NULL"))
2992+
val commentValue = sql(s"DESCRIBE $tableName").filter(s"col_name = '$columnName'")
2993+
.select("comment").first().getString(0)
2994+
if (comment == null) {
2995+
// When comment is NULL, the comment should be removed
2996+
assert(commentValue.isEmpty,
2997+
s"Expected comment to be removed for column $columnName")
2998+
} else {
2999+
assert(commentValue === comment,
3000+
s"Expected comment to be '$comment' for column $columnName, but got '$commentValue'")
3001+
}
3002+
}
3003+
28333004
private def checkTableComment(tableName: String, comment: String): Unit = {
28343005
sql(s"COMMENT ON TABLE $tableName IS " + Option(comment).map("'" + _ + "'").getOrElse("NULL"))
28353006
if (comment == null) {

0 commit comments

Comments
 (0)