flisboac commented on issue #9406: URL: https://github.com/apache/iceberg/issues/9406#issuecomment-2047837859
I've also come across this error. The difference is tha0, AFAICT, I don't need to rollback the table when that error happens. ```text Traceback (most recent call last): File "/mnt1/yarn/usercache/hadoop/appcache/application_1712706581772_0133/container_1712706581772_0133_01_000001/REDACTED.py", line 679, in _do_run self.save_cdc_history( File "/mnt1/yarn/usercache/hadoop/appcache/application_1712706581772_0133/container_1712706581772_0133_01_000001/REDACTED.py", line 1010, in save_cdc_history df.writeTo(full_table_name).append() File "/mnt1/yarn/usercache/hadoop/appcache/application_1712706581772_0133/container_1712706581772_0133_01_000001/pyspark.zip/pyspark/sql/readwriter.py", line 2107, in append self._jwriter.append() File "/mnt1/yarn/usercache/hadoop/appcache/application_1712706581772_0133/container_1712706581772_0133_01_000001/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__ return_value = get_return_value( File "/mnt1/yarn/usercache/hadoop/appcache/application_1712706581772_0133/container_1712706581772_0133_01_000001/pyspark.zip/pyspark/errors/exceptions/captured.py", line 179, in deco return f(*a, **kw) File "/mnt1/yarn/usercache/hadoop/appcache/application_1712706581772_0133/container_1712706581772_0133_01_000001/py4j-0.10.9.7-src.zip/py4j/protocol.py", line 326, in get_return_value raise Py4JJavaError( py4j.protocol.Py4JJavaError: An error occurred while calling o174.append. : org.apache.iceberg.exceptions.CommitFailedException: Cannot commit FULL_TABLE_NAME_REDACTED because base metadata location 's3://REDACTED/metadata/08121-ff854d63-3fff-4d64-b456-aa8d3e8850da.metadata.json' is not same as the current Glue location 's3://REDACTED/metadata/08122-e77b0e4e-aa50-4346-a578-9eb0b917c097.metadata.json' at org.apache.iceberg.aws.glue.GlueTableOperations.checkMetadataLocation(GlueTableOperations.java:272) at org.apache.iceberg.aws.glue.GlueTableOperations.doCommit(GlueTableOperations.java:158) at org.apache.iceberg.BaseMetastoreTableOperations.commit(BaseMetastoreTableOperations.java:135) at org.apache.iceberg.SnapshotProducer.lambda$commit$2(SnapshotProducer.java:400) at org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:413) at org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:219) at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:203) at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:196) at org.apache.iceberg.SnapshotProducer.commit(SnapshotProducer.java:374) at org.apache.iceberg.spark.source.SparkWrite.commitOperation(SparkWrite.java:233) at org.apache.iceberg.spark.source.SparkWrite.access$1300(SparkWrite.java:84) at org.apache.iceberg.spark.source.SparkWrite$BatchAppend.commit(SparkWrite.java:296) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:399) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:359) at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:225) at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:337) at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:336) at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:225) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:113) at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:108) at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:255) at org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:129) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$9(SQLExecution.scala:165) at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:108) at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:255) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$8(SQLExecution.scala:165) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:276) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:164) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:70) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:101) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:503) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:503) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:33) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:33) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:33) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:479) at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:101) at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:88) at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:86) at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:151) at org.apache.spark.sql.DataFrameWriterV2.runCommand(DataFrameWriterV2.scala:196) at org.apache.spark.sql.DataFrameWriterV2.append(DataFrameWriterV2.scala:150) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:568) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:840) ``` Perhaps the lock is not being held for enough time to go through the write to the end? This is how my table was created: ```sql CREATE TABLE __REDACTED__ ( -- Many columns here; two array<string>, the rest are strings, booleans, bigints and timestamps target_database_name string, target_table_name string, generated_at string, started_at timestamp ) USING iceberg LOCATION 's3://REDACTED' PARTITIONED BY (month(started_at)) TBLPROPERTIES ( 'table_type'='iceberg', 'format-version'='2', 'write.format.default'='parquet', 'write.parquet.compression-codec'='snappy', 'write.avro.compression-codec'='snappy', 'write.metadata.delete-after-commit.enabled'='true', 'write.delete.mode'='copy-on-write', 'write.update.mode'='merge-on-read', 'write.merge.mode'='merge-on-read', 'write.metadata.metrics.column.started_at'='full', 'write.metadata.metrics.column.target_table_name'='full', 'write.metadata.metrics.column.target_table_name'='full', 'write.metadata.metrics.column.started_at'='full' -- More columns are configured as "full" metrics; 9 "full" in total, including the ones above ); ALTER TABLE inf_datalake_pipelines_pub.cdc_manifest_history WRITE ORDERED BY started_at ASC, target_database_name ASC, target_table_name ASC, generated_at ASC; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org