Gowthami03B commented on code in PR #614: URL: https://github.com/apache/iceberg-python/pull/614#discussion_r1573309559
########## tests/conftest.py: ########## @@ -2060,7 +2060,7 @@ def spark() -> "SparkSession": .config("spark.sql.catalog.hive.warehouse", "s3://warehouse/hive/") .config("spark.sql.catalog.hive.s3.endpoint", "http://localhost:9000") .config("spark.sql.catalog.hive.s3.path-style-access", "true") - .config("spark.sql.execution.arrow.pyspark.enabled", "true") + .config("spark.sql.execution.arrow.pyspark.enabled", "false") Review Comment: tests/integration/test_inspect_table.py::test_inspect_files[1] /home/codespace/.cache/pypoetry/virtualenvs/pyiceberg-FsHa-ZgB-py3.10/lib/python3.10/site-packages/pyspark/sql/pandas/conversion.py:198: UserWarning: toPandas attempted Arrow optimization because 'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has reached the error below and can not continue. Note that 'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an effect on failures in the middle of computation. An error occurred while calling o71.getResult. : org.apache.spark.SparkException: Exception thrown in awaitResult: at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:56) at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:310) at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:98) at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:94) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) at java.base/java.lang.reflect.Method.invoke(Method.java:580) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:1583) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0) (85bf1d94-7064-4319-9250-09ddcbd80af7.internal.cloudapp.net executor driver): java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:237) at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchIterator.$anonfun$next$1(ArrowConverters.scala:119) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchIterator.next(ArrowConverters.scala:122) at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchIterator.next(ArrowConverters.scala:77) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at ########## tests/conftest.py: ########## @@ -2060,7 +2060,7 @@ def spark() -> "SparkSession": .config("spark.sql.catalog.hive.warehouse", "s3://warehouse/hive/") .config("spark.sql.catalog.hive.s3.endpoint", "http://localhost:9000") .config("spark.sql.catalog.hive.s3.path-style-access", "true") - .config("spark.sql.execution.arrow.pyspark.enabled", "true") + .config("spark.sql.execution.arrow.pyspark.enabled", "false") Review Comment: @kevinjqliu tests/integration/test_inspect_table.py::test_inspect_files[1] /home/codespace/.cache/pypoetry/virtualenvs/pyiceberg-FsHa-ZgB-py3.10/lib/python3.10/site-packages/pyspark/sql/pandas/conversion.py:198: UserWarning: toPandas attempted Arrow optimization because 'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has reached the error below and can not continue. Note that 'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an effect on failures in the middle of computation. An error occurred while calling o71.getResult. : org.apache.spark.SparkException: Exception thrown in awaitResult: at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:56) at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:310) at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:98) at org.apache.spark.security.SocketAuthServer.getResult(SocketAuthServer.scala:94) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) at java.base/java.lang.reflect.Method.invoke(Method.java:580) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:1583) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0) (85bf1d94-7064-4319-9250-09ddcbd80af7.internal.cloudapp.net executor driver): java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:237) at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchIterator.$anonfun$next$1(ArrowConverters.scala:119) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchIterator.next(ArrowConverters.scala:122) at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchIterator.next(ArrowConverters.scala:77) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org