xwmr-max opened a new issue, #8124:
URL: https://github.com/apache/iceberg/issues/8124
### Apache Iceberg version
1.0.0
### Query engine
Spark
### Please describe the bug 🐞
Using the spark v1 interface, data is written to iceberg, and the following
error is reported:
```java
org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:193)
2023-07-21 09:51:21,136 | INFO | main | Table loaded by catalog:
spark_catalog.test.iceberg04 |
org.apache.iceberg.BaseMetastoreCatalog.loadTable(BaseMetastoreCatalog.java:64)
2023-07-21 09:51:21,198 | ERROR | main | SparkSubmit Exception |
org.apache.spark.internal.Logging.logError(Logging.scala:94)
org.apache.spark.sql.AnalysisException: LEGACY store assignment policy is
disallowed in Spark data source V2. Please set the configuration
spark.sql.storeAssignmentPolicy to other values.
at
org.apache.spark.sql.errors.QueryCompilationErrors$.legacyStoreAssignmentPolicyError(QueryCompilationErrors.scala:114)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$validateStoreAssignmentPolicy(Analyzer.scala:3267)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOutputRelation$$anonfun$apply$32.applyOrElse(Analyzer.scala:3208)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOutputRelation$$anonfun$apply$32.applyOrElse(Analyzer.scala:3205)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$2(AnalysisHelper.scala:108)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:73)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$1(AnalysisHelper.scala:108)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:221)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown(AnalysisHelper.scala:106)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown$(AnalysisHelper.scala:104)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDown(LogicalPlan.scala:29)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:73)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:72)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:29)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOutputRelation$.apply(Analyzer.scala:3205)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOutputRelation$.apply(Analyzer.scala:3204)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:216)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
~[scala-library-2.12.10.jar:?]
at
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
~[scala-library-2.12.10.jar:?]
at scala.collection.immutable.List.foldLeft(List.scala:89)
~[scala-library-2.12.10.jar:?]
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:213)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:205)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at scala.collection.immutable.List.foreach(List.scala:392)
~[scala-library-2.12.10.jar:?]
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:205)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:197)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:191)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:156)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:183)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:183)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:175)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:228)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:174)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:73)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
~[spark-catalyst_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:774)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:73)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:71)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:63)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$withCachedData$1(QueryExecution.scala:77)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:774)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
at
org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:76)
~[spark-sql_2.12-3.1.1.jar:3.1.1]
```
The code is as follows:
```java
import com.google.common.collect.ImmutableList
import org.apache.spark.sql.types.{IntegerType, StringType, StructField,
StructType}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
object IcebergDataFrame {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().enableHiveSupport()
.config("spark.sql.storeAssignmentPolicy", "LEGACY")
// .config("spark.sql.sources.useV1SourceList", "iceberg")
.getOrCreate()
val rows = ImmutableList.of(Row("1", "n1"))
val df = spark.createDataFrame(rows, schema)
df.write.mode(SaveMode.Append).insertInto("test.iceberg04")
}
val schema: StructType =
StructType(
StructField("id", StringType) ::
StructField("name", StringType) :: Nil)
}
```
Set the properties as follows:
```java
--num-executors 1 --executor-cores 1 \
--conf "spark.yarn.maxAppAttempts=1" \
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.iceberg.type=hive \
--conf spark.sql.catalog.iceberg.uri=thrift://xxx,thrift://xxxxx \
--conf
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
\
--class com.api.cluster.IcebergDataFrame \
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]