manisin commented on issue #12738: URL: https://github.com/apache/iceberg/issues/12738#issuecomment-3006564988
Thanks for bringing up this issue! I have created a very simple repro with rest catalog: `create table replaceTestTable (a int, b string) partitioned by (a) location 's3://test/replaceTestTable’; create or replace table replaceTestTable (c int, d string) using iceberg; select * from replaceTestTable; ` This fails with the following stacktrace: ``` [INTERNAL_ERROR] The Spark SQL phase analysis failed with an internal error. You hit a bug in Spark or the Spark plugins you use. Please, report this bug to the corresponding communities or vendors, and provide the full stack trace. org.apache.spark.SparkException: [INTERNAL_ERROR] The Spark SQL phase analysis failed with an internal error. You hit a bug in Spark or the Spark plugins you use. Please, report this bug to the corresponding communities or vendors, and provide the full stack trace. Caused by: java.lang.NullPointerException: Type cannot be null at org.apache.iceberg.relocated.com.google.common.base.Preconditions.checkNotNull(Preconditions.java:921) at org.apache.iceberg.types.Types$NestedField.<init>(Types.java:448) at org.apache.iceberg.types.Types$NestedField.optional(Types.java:417) at org.apache.iceberg.PartitionSpec.partitionType(PartitionSpec.java:132) at org.apache.iceberg.Partitioning.buildPartitionProjectionType(Partitioning.java:274) at org.apache.iceberg.Partitioning.partitionType(Partitioning.java:242) at org.apache.iceberg.spark.source.SparkTable.metadataColumns(SparkTable.java:258) at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation.metadataOutput$lzycompute(DataSourceV2Relation.scala:59) at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation.metadataOutput(DataSourceV2Relation.scala:56) at org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias.metadataOutput(basicLogicalOperators.scala:1692) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$3(Analyzer.scala:1051) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$3$adapted(Analyzer.scala:1051) at scala.collection.Iterator.exists(Iterator.scala:969) at scala.collection.Iterator.exists$(Iterator.scala:967) at scala.collection.AbstractIterator.exists(Iterator.scala:1431) at scala.collection.IterableLike.exists(IterableLike.scala:79) at scala.collection.IterableLike.exists$(IterableLike.scala:78) at scala.collection.AbstractIterable.exists(Iterable.scala:56) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$2(Analyzer.scala:1051) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$2$adapted(Analyzer.scala:1046) at org.apache.spark.sql.catalyst.trees.TreeNode.exists(TreeNode.scala:223) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$1(Analyzer.scala:1046) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$1$adapted(Analyzer.scala:1046) at scala.collection.LinearSeqOptimized.exists(LinearSeqOptimized.scala:95) at scala.collection.LinearSeqOptimized.exists$(LinearSeqOptimized.scala:92) at scala.collection.immutable.Stream.exists(Stream.scala:204) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.org$apache$spark$sql$catalyst$analysis$Analyzer$AddMetadataColumns$$hasMetadataCol(Analyzer.scala:1046) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$$anonfun$apply$13.applyOrElse(Analyzer.scala:1016) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$$anonfun$apply$13.applyOrElse(Analyzer.scala:1013) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$2(AnalysisHelper.scala:170) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:170) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.apply(Analyzer.scala:1013) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.apply(Analyzer.scala:1009) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222) at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126) at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122) at scala.collection.immutable.List.foldLeft(List.scala:91) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211) at scala.collection.immutable.List.foreach(List.scala:431) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211) at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240) at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236) at org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187) at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236) at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182) at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89) at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182) at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222) at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219) at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org