rdblue commented on code in PR #9332: URL: https://github.com/apache/iceberg/pull/9332#discussion_r1430719038
########## spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala: ########## @@ -122,37 +147,132 @@ class IcebergSparkSqlExtensionsParser(delegate: ParserInterface) extends ParserI if (isIcebergCommand(sqlTextAfterSubstitution)) { parse(sqlTextAfterSubstitution) { parser => astBuilder.visit(parser.singleStatement()) }.asInstanceOf[LogicalPlan] } else { - delegate.parsePlan(sqlText) + ViewSubstitutionExecutor.execute(delegate.parsePlan(sqlText)) } } - object UnresolvedIcebergTable { + private object ViewSubstitutionExecutor extends RuleExecutor[LogicalPlan] { + private val fixedPoint = FixedPoint( + maxIterations, + errorOnExceed = true, + maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key) - def unapply(plan: LogicalPlan): Option[LogicalPlan] = { - EliminateSubqueryAliases(plan) match { - case UnresolvedRelation(multipartIdentifier, _, _) if isIcebergTable(multipartIdentifier) => - Some(plan) - case _ => + override protected def batches: Seq[Batch] = Seq(Batch("pre-substitution", fixedPoint, V2ViewSubstitution)) + } + + private object V2ViewSubstitution extends Rule[LogicalPlan] { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + + // the reason for handling these cases here is because ResolveSessionCatalog exits early for v2 commands + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { + case u@UnresolvedView(identifier, _, _, _) => + lookupTableOrView(identifier, viewOnly = true).getOrElse(u) + + case u@UnresolvedTableOrView(identifier, _, _) => + lookupTableOrView(identifier).getOrElse(u) + + case CreateView(UnresolvedIdentifier(nameParts, allowTemp), userSpecifiedColumns, + comment, properties, originalText, query, allowExisting, replace) => + CreateIcebergView(UnresolvedIdentifier(nameParts, allowTemp), userSpecifiedColumns, + comment, properties, originalText, query, allowExisting, replace) + + case ShowViews(UnresolvedNamespace(multipartIdentifier), pattern, output) => + ShowIcebergViews(UnresolvedNamespace(multipartIdentifier), pattern, output) + + case DropView(UnresolvedIdentifier(nameParts, allowTemp), ifExists) => + DropIcebergView(UnresolvedIdentifier(nameParts, allowTemp), ifExists) + } + + private def expandIdentifier(nameParts: Seq[String]): Seq[String] = { + if (!isResolvingView || isReferredTempViewName(nameParts)) return nameParts + + if (nameParts.length == 1) { + AnalysisContext.get.catalogAndNamespace :+ nameParts.head + } else if (SparkSession.active.sessionState.catalogManager.isCatalogRegistered(nameParts.head)) { + nameParts + } else { + AnalysisContext.get.catalogAndNamespace.head +: nameParts + } + } + + /** + * Resolves relations to `ResolvedTable` or `Resolved[Temp/Persistent]View`. This is + * for resolving DDL and misc commands. Code is copied from Spark's Analyzer, but performs + * a view lookup before performing a table lookup. + */ + private def lookupTableOrView( + identifier: Seq[String], + viewOnly: Boolean = false): Option[LogicalPlan] = { + lookupTempView(identifier).map { tempView => + ResolvedTempView(identifier.asIdentifier, tempView.tableMeta.schema) + }.orElse { + val multipartIdent = expandIdentifier(identifier) + val catalogAndIdentifier = Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava) + if (null != catalogAndIdentifier) { + lookupView(SparkSession.active.sessionState.catalogManager.currentCatalog, + catalogAndIdentifier.identifier()) + .orElse(lookupTable(SparkSession.active.sessionState.catalogManager.currentCatalog, + catalogAndIdentifier.identifier())) + } else { None + } } } - private def isIcebergTable(multipartIdent: Seq[String]): Boolean = { - val catalogAndIdentifier = Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava) - catalogAndIdentifier.catalog match { - case tableCatalog: TableCatalog => - Try(tableCatalog.loadTable(catalogAndIdentifier.identifier)) - .map(isIcebergTable) - .getOrElse(false) + private def isResolvingView: Boolean = AnalysisContext.get.catalogAndNamespace.nonEmpty - case _ => - false + private def isReferredTempViewName(nameParts: Seq[String]): Boolean = { + AnalysisContext.get.referredTempViewNames.exists { n => + (n.length == nameParts.length) && n.zip(nameParts).forall { + case (a, b) => resolver(a, b) + } } } - private def isIcebergTable(table: Table): Boolean = table match { - case _: SparkTable => true - case _ => false + private def lookupTempView(identifier: Seq[String]): Option[TemporaryViewRelation] = { + // We are resolving a view and this name is not a temp view when that view was created. We + // return None earlier here. + if (isResolvingView && !isReferredTempViewName(identifier)) return None Review Comment: Because an Iceberg view must not reference a temporary view, this check isn't needed and I don't think we will need `isResolvingView` either since that references the `AnalysisContext`. I think that the only time this should resolve temporary views is when the original query (as parsed) references a temporary view. Any time there is a single-part identifier in a view, it should be resolved using the view's default catalog and namespace. In addition, we need to ensure that there is no conflicting temporary view definition when a view is created. We'll need to check that any single-part identifier in the view SQL does not represent a temporary view at creation time. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org