apache
diff --git a/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 25 additions & 0 deletions b/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java‎
Lines changed: 10 additions & 0 deletions b/‎sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala‎
Lines changed: 9 additions & 1 deletion b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala‎
Lines changed: 180 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala‎
Lines changed: 180 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala‎
Lines changed: 32 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala‎
Lines changed: 6 additions & 6 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala‎
Lines changed: 6 additions & 6 deletions
@@ -2210,6 +2210,31 @@
     ],
     "sqlState" : "42000"
   },
+  "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS" : {
+    "message" : [
+      "Detected incompatible changes to table <tableName> after DataFrame/Dataset has been resolved and analyzed, meaning the underlying plan is out of sync. Please, re-create DataFrame/Dataset before attempting to execute the query again."
+    ],
+    "subClass" : {
+      "COLUMNS_MISMATCH" : {
+        "message" : [
+          "Data columns have changed:",
+          "<errors>"
+        ]
+      },
+      "METADATA_COLUMNS_MISMATCH" : {
+        "message" : [
+          "Metadata columns have changed:",
+          "<errors>"
+        ]
+      },
+      "TABLE_ID_MISMATCH" : {
+        "message" : [
+          "Table ID has changed from <capturedTableId> to <currentTableId>."
+        ]
+      }
+    },
+    "sqlState" : "51024"
+  },
   "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : {
     "message" : [
       "The SQL query of view <viewName> has an incompatible schema change and column <colName> cannot be resolved. Expected <expectedNum> columns named <colName> but got <actualCols>.",
 
@@ -50,6 +50,16 @@ public interface Table {
    */
   String name();
 
+  /**
+   * An ID of the table that can be used to reliably check if two table objects refer to the same
+   * metastore entity. If a table is dropped and recreated again with the same name, the new table
+   * ID must be different. This method must return null if connectors don't support the notion of
+   * table ID.
+   */
+  default String id() {
+    return null;
+  }
+
   /**
    * Returns the schema of this table. If the table is not readable and doesn't have a schema, an
    * empty schema can be returned here.
 
@@ -40,6 +40,7 @@ import org.apache.spark.sql.connector.write.RowLevelOperation.Command.{DELETE, M
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, ExtractV2Table}
+import org.apache.spark.sql.execution.datasources.v2.V2TableRefreshUtil
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, AtomicType, BooleanType, DataType, IntegerType, MapType, MetadataBuilder, StringType, StructField, StructType}
 import org.apache.spark.util.ArrayImplicits._
@@ -687,7 +688,14 @@ case class ReplaceTableAsSelect(
     isAnalyzed: Boolean = false)
   extends V2CreateTableAsSelectPlan {
 
-  override def markAsAnalyzed(ac: AnalysisContext): LogicalPlan = copy(isAnalyzed = true)
+  override def markAsAnalyzed(ac: AnalysisContext): LogicalPlan = {
+    // RTAS may drop and recreate table before query execution, breaking self-references
+    // refresh and pin versions here to read from original table versions instead of
+    // newly created empty table that is meant to serve as target for append/overwrite
+    val refreshedQuery = V2TableRefreshUtil.refreshVersions(query)
+    val pinnedQuery = V2TableRefreshUtil.pinVersions(refreshedQuery)
+    copy(query = pinnedQuery, isAnalyzed = true)
+  }
 
   override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
     this.copy(partitioning = rewritten)
 
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, MetadataColumnHelper}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.util.SchemaUtils
+import org.apache.spark.util.ArrayImplicits._
+
+private[sql] object V2TableUtil extends SQLConfHelper {
+
+  def toQualifiedName(catalog: CatalogPlugin, ident: Identifier): String = {
+    s"${quoteIfNeeded(catalog.name)}.${ident.quoted}"
+  }
+
+  /**
+   * Validates that captured data columns match the current table schema.
+   *
+   * @param table the current table metadata
+   * @param relation the relation with captured columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateCapturedColumns(table: Table, relation: DataSourceV2Relation): Seq[String] = {
+    validateCapturedColumns(table, relation.table.columns.toImmutableArraySeq)
+  }
+
+  /**
+   * Validates that captured data columns match the current table schema.
+   *
+   * Checks for:
+   *  - Column type or nullability changes
+   *  - Removed columns (missing from the current table schema)
+   *  - Added columns (new in the current table schema)
+   *
+   * @param table the current table metadata
+   * @param originCols the originally captured columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateCapturedColumns(table: Table, originCols: Seq[Column]): Seq[String] = {
+    val errors = mutable.ArrayBuffer[String]()
+    val colsByNormalizedName = indexColumns(table.columns.toImmutableArraySeq)
+    val originColsByNormalizedName = indexColumns(originCols)
+
+    originColsByNormalizedName.foreach { case (normalizedName, originCol) =>
+      colsByNormalizedName.get(normalizedName) match {
+        case Some(col) =>
+          if (originCol.dataType != col.dataType || originCol.nullable != col.nullable) {
+            val oldType = formatType(originCol.dataType, originCol.nullable)
+            val newType = formatType(col.dataType, col.nullable)
+            errors += s"`${originCol.name}` type has changed from $oldType to $newType"
+          }
+        case None =>
+          errors += s"${formatColumn(originCol)} has been removed"
+      }
+    }
+
+    colsByNormalizedName.foreach { case (normalizedName, col) =>
+      if (!originColsByNormalizedName.contains(normalizedName)) {
+        errors += s"${formatColumn(col)} has been added"
+      }
+    }
+
+    errors.toSeq
+  }
+
+  /**
+   * Validates that captured metadata columns are consistent with the current table metadata.
+   *
+   * @param table the current table metadata
+   * @param relation the relation with captured metadata columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateCapturedMetadataColumns(table: Table, relation: DataSourceV2Relation): Seq[String] = {
+    validateCapturedMetadataColumns(table, extractMetadataColumns(relation))
+  }
+
+  // extracts original column info for all metadata attributes in relation
+  private def extractMetadataColumns(relation: DataSourceV2Relation): Seq[MetadataColumn] = {
+    val metaAttrs = relation.output.filter(_.isMetadataCol)
+    if (metaAttrs.nonEmpty) {
+      val metaCols = metadataColumns(relation.table)
+      val normalizedMetaAttrNames = metaAttrs.map(attr => normalize(attr.name)).toSet
+      metaCols.filter(col => normalizedMetaAttrNames.contains(normalize(col.name)))
+    } else {
+      Seq.empty
+    }
+  }
+
+  /**
+   * Validates that captured metadata columns are consistent with the current table metadata.
+   *
+   * Checks for:
+   *  - Metadata column type or nullability changes
+   *  - Removed metadata columns (missing from current table)
+   *
+   * @param table the current table metadata
+   * @param originMetaCols the originally captured metadata columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateCapturedMetadataColumns(
+      table: Table,
+      originMetaCols: Seq[MetadataColumn]): Seq[String] = {
+    val errors = mutable.ArrayBuffer[String]()
+    val metaCols = metadataColumns(table)
+    val metaColsByNormalizedName = indexMetadataColumns(metaCols)
+
+    originMetaCols.foreach { originMetaCol =>
+      val normalizedName = normalize(originMetaCol.name)
+      metaColsByNormalizedName.get(normalizedName) match {
+        case Some(metaCol) =>
+          if (originMetaCol.dataType != metaCol.dataType ||
+              originMetaCol.isNullable != metaCol.isNullable) {
+            val oldType = formatType(originMetaCol.dataType, originMetaCol.isNullable)
+            val newType = formatType(metaCol.dataType, metaCol.isNullable)
+            errors += s"`${originMetaCol.name}` type has changed from $oldType to $newType"
+          }
+        case None =>
+          errors += s"${formatMetadataColumn(originMetaCol)} has been removed"
+      }
+    }
+
+    errors.toSeq
+  }
+
+  private def formatColumn(col: Column): String = {
+    s"`${col.name}` ${formatType(col.dataType, col.nullable)}"
+  }
+
+  private def formatMetadataColumn(col: MetadataColumn): String = {
+    s"`${col.name}` ${formatType(col.dataType, col.isNullable)}"
+  }
+
+  private def formatType(dataType: DataType, nullable: Boolean): String = {
+    if (nullable) dataType.sql else s"${dataType.sql} NOT NULL"
+  }
+
+  private def indexColumns(cols: Seq[Column]): Map[String, Column] = {
+    index(cols)(_.name)
+  }
+
+  private def indexMetadataColumns(cols: Seq[MetadataColumn]): Map[String, MetadataColumn] = {
+    index(cols)(_.name)
+  }
+
+  private def index[C](cols: Seq[C])(extractName: C => String): Map[String, C] = {
+    SchemaUtils.checkColumnNameDuplication(cols.map(extractName), conf.caseSensitiveAnalysis)
+    cols.map(col => normalize(extractName(col)) -> col).toMap
+  }
+
+  private def metadataColumns(table: Table): Seq[MetadataColumn] = table match {
+    case hasMeta: SupportsMetadataColumns => hasMeta.metadataColumns.toImmutableArraySeq
+    case _ => Seq.empty
+  }
+
+  private def normalize(name: String): String = {
+    if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
+  }
+}
@@ -2121,6 +2121,38 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     }
   }
 
+  def tableIdChangedAfterAnalysis(
+      tableName: String,
+      capturedTableId: String,
+      currentTableId: String): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.TABLE_ID_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> toSQLId(tableName),
+        "capturedTableId" -> capturedTableId,
+        "currentTableId" -> currentTableId))
+  }
+
+  def columnsChangedAfterAnalysis(
+      tableName: String,
+      errors: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> toSQLId(tableName),
+        "errors" -> errors.mkString("\n- ", "\n- ", "")))
+  }
+
+  def metadataColumnsChangedAfterAnalysis(
+      tableName: String,
+      errors: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.METADATA_COLUMNS_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> toSQLId(tableName),
+        "errors" -> errors.mkString("\n- ", "\n- ", "")))
+  }
+
   def numberOfPartitionsNotAllowedWithUnspecifiedDistributionError(): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_NUM_WITH_UNSPECIFIED_DISTRIBUTION",
 
@@ -23,8 +23,9 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Attri
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, ExposesMetadataColumns, Histogram, HistogramBin, LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
-import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, truncatedString, CharVarcharUtils}
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, SupportsMetadataColumns, Table, TableCapability}
+import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, SupportsMetadataColumns, Table, TableCapability, TableCatalog, V2TableUtil}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
 import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -57,9 +58,8 @@ abstract class DataSourceV2RelationBase(
   }
 
   override def name: String = {
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     (catalog, identifier) match {
-      case (Some(cat), Some(ident)) => s"${quoteIfNeeded(cat.name())}.${ident.quoted}"
+      case (Some(cat), Some(ident)) => V2TableUtil.toQualifiedName(cat, ident)
       case _ => table.name()
     }
   }
@@ -259,10 +259,10 @@ object ExtractV2Table {
 }
 
 object ExtractV2CatalogAndIdentifier {
-  def unapply(relation: DataSourceV2Relation): Option[(CatalogPlugin, Identifier)] = {
+  def unapply(relation: DataSourceV2Relation): Option[(TableCatalog, Identifier)] = {
     relation match {
       case DataSourceV2Relation(_, _, Some(catalog), Some(identifier), _, _) =>
-        Some((catalog, identifier))
+        Some((catalog.asTableCatalog, identifier))
       case _ =>
         None
     }