This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3b06a6580a34 [SPARK-55696][SQL] Add explicit error to Encoders.bean
for interface class
3b06a6580a34 is described below
commit 3b06a6580a3484c71444b2229c1292c2b800eb20
Author: Szehon Ho <[email protected]>
AuthorDate: Fri Feb 27 16:49:33 2026 +0800
[SPARK-55696][SQL] Add explicit error to Encoders.bean for interface class
### What changes were proposed in this pull request?
Add explicit error message to Encoders.bean against interfaces
### Why are the changes needed?
For bean Encoders, the de-serializer uses the constructor inferred from the
bean Encoder. ie, we get the bytes back, the Dataset deserialization will use
the Encoder's interface class to try to construct , and fail.
Code ref:
https://github.com/apache/spark/blob/b3703755d80585297367d539de9fa8c5783b1c6b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala#L482
### Does this PR introduce _any_ user-facing change?
No, it already fails today with another message:
```
java.lang.IllegalStateException: found an unhandled type: null
at
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:915)
at
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:791)
at
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:886)
at
org.apache.commons.lang3.reflect.TypeUtils.getTypeArguments(TypeUtils.java:873)
at
org.apache.spark.sql.catalyst.JavaTypeInference$.encoderFor(JavaTypeInference.scala:159)
at
org.apache.spark.sql.catalyst.JavaTypeInference$.encoderFor(JavaTypeInference.scala:63)
at
org.apache.spark.sql.catalyst.JavaTypeInference$.encoderFor(JavaTypeInference.scala:56)
at org.apache.spark.sql.Encoders$.bean(Encoders.scala:211)
at org.apache.spark.sql.Encoders.bean(Encoders.scala)
```
it will now fail more explicitly.
### How was this patch tested?
Added unit test in JavaDatasetSuite
### Was this patch authored or co-authored using generative AI tooling?
Yes, cursor
Closes #54494 from szehon-ho/encoder_interface.
Authored-by: Szehon Ho <[email protected]>
Signed-off-by: Cheng Pan <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 6 ++
.../main/scala/org/apache/spark/sql/Encoders.scala | 2 +-
.../spark/sql/catalyst/JavaTypeInference.scala | 8 ++
.../org/apache/spark/sql/JavaDatasetSuite.java | 115 +++++++++++++++++++++
4 files changed, 130 insertions(+), 1 deletion(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index 94111b8e9ee0..52a21e43ea9e 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -206,6 +206,12 @@
],
"sqlState" : "42K03"
},
+ "BEAN_ENCODER_INTERFACE_NOT_SUPPORTED" : {
+ "message" : [
+ "Bean encoder does not support interface type <className>."
+ ],
+ "sqlState" : "0A000"
+ },
"BINARY_ARITHMETIC_OVERFLOW" : {
"message" : [
"<value1> <symbol> <value2> caused overflow. Use <functionName> to
ignore overflow problem and return NULL."
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
index 7e698e58321e..72cd1190ba40 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -195,7 +195,7 @@ object Encoders {
/**
* Creates an encoder for Java Bean of type T.
*
- * T must be publicly accessible.
+ * T must be a concrete class (not an interface), and must be publicly
accessible.
*
* supported types for java bean field:
* - primitive types: boolean, int, double, etc.
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 91947cf416fb..c7d3e4a47c7f 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -26,6 +26,7 @@ import scala.reflect.ClassTag
import org.apache.commons.lang3.reflect.{TypeUtils => JavaTypeUtils}
+import org.apache.spark.SparkUnsupportedOperationException
import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder,
BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder,
BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder,
DayTimeIntervalEncoder, DEFAULT_GEOGRAPHY_ENCODER, DEFAULT_GEOMETRY_ENCODER,
DEFAULT_JAVA_DECIMAL_ENCODER, EncoderField, IterableEncoder, JavaBeanEncoder,
JavaBigIntEncoder, JavaEnumEncoder, LocalDateTimeEncoder, LocalTimeEncoder,
MapEncoder, PrimitiveBooleanEncoder, [...]
import org.apache.spark.sql.errors.ExecutionErrors
@@ -151,6 +152,13 @@ object JavaTypeInference {
if (seenTypeSet.contains(c)) {
throw ExecutionErrors.cannotHaveCircularReferencesInBeanClassError(c)
}
+ // Encoders for interfaces are not supported because de-serialization
uses its
+ // Deserializer to instantiate the class, which will not work for
interfaces.
+ if (c.isInterface) {
+ throw new SparkUnsupportedOperationException(
+ errorClass = "BEAN_ENCODER_INTERFACE_NOT_SUPPORTED",
+ messageParameters = Map("className" -> c.getName))
+ }
// TODO: we should only collect properties that have getter and setter.
However, some tests
// pass in scala case class as java bean class which doesn't have
getter and setter.
diff --git
a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 5b1f98475d51..62d44e0af8b0 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -38,6 +38,7 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.apache.spark.SparkUnsupportedOperationException;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.*;
@@ -1919,6 +1920,120 @@ public class JavaDatasetSuite implements Serializable {
}
+ /**
+ * Interface with JavaBean-style getters/setters for testing encoder with
interface type.
+ */
+ public interface BeanInterface extends Serializable {
+ String getValue();
+ void setValue(String value);
+ int getId();
+ void setId(int id);
+ }
+
+ public static class BeanImplA implements BeanInterface {
+ private String value;
+ private int id;
+
+ @Override
+ public String getValue() { return value; }
+ @Override
+ public void setValue(String value) { this.value = value; }
+ @Override
+ public int getId() { return id; }
+ @Override
+ public void setId(int id) { this.id = id; }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof BeanImplA)) return false;
+ BeanImplA that = (BeanImplA) o;
+ return id == that.id && Objects.equals(value, that.value);
+ }
+ @Override
+ public int hashCode() { return Objects.hash(value, id); }
+ }
+
+ public static class BeanImplB implements BeanInterface {
+ private String value;
+ private int id;
+
+ @Override
+ public String getValue() { return value; }
+ @Override
+ public void setValue(String value) { this.value = value; }
+ @Override
+ public int getId() { return id; }
+ @Override
+ public void setId(int id) { this.id = id; }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof BeanImplB)) return false;
+ BeanImplB that = (BeanImplB) o;
+ return id == that.id && Objects.equals(value, that.value);
+ }
+ @Override
+ public int hashCode() { return Objects.hash(value, id); }
+ }
+
+ @Test
+ public void testBeanEncoderRejectsInterface() {
+ SparkUnsupportedOperationException e = Assertions.assertThrows(
+ SparkUnsupportedOperationException.class,
+ () -> Encoders.bean(BeanInterface.class));
+ Assertions.assertEquals("BEAN_ENCODER_INTERFACE_NOT_SUPPORTED",
e.getCondition());
+ Assertions.assertEquals("0A000", e.getSqlState());
+ Assertions.assertEquals(
+ Collections.singletonMap("className", BeanInterface.class.getName()),
+ e.getMessageParameters());
+ }
+
+ @Test
+ public void testKryoEncoderWithInterface() {
+ BeanImplA a = new BeanImplA();
+ a.setValue("a");
+ a.setId(1);
+ BeanImplB b = new BeanImplB();
+ b.setValue("b");
+ b.setId(2);
+ List<BeanInterface> data = Arrays.asList(a, b);
+
+ Encoder<BeanInterface> kryoEncoder = Encoders.kryo(BeanInterface.class);
+ Dataset<BeanInterface> ds = spark.createDataset(data, kryoEncoder);
+ List<BeanInterface> collected = ds.collectAsList();
+ Assertions.assertEquals(2, collected.size());
+ Assertions.assertEquals("a", collected.get(0).getValue());
+ Assertions.assertEquals(1, collected.get(0).getId());
+ Assertions.assertEquals("b", collected.get(1).getValue());
+ Assertions.assertEquals(2, collected.get(1).getId());
+ Assertions.assertInstanceOf(BeanImplA.class, collected.get(0));
+ Assertions.assertInstanceOf(BeanImplB.class, collected.get(1));
+ }
+
+ @Test
+ public void testJavaSerializationEncoderWithInterface() {
+ BeanImplA a = new BeanImplA();
+ a.setValue("a");
+ a.setId(1);
+ BeanImplB b = new BeanImplB();
+ b.setValue("b");
+ b.setId(2);
+ List<BeanInterface> data = Arrays.asList(a, b);
+
+ Encoder<BeanInterface> javaEncoder =
Encoders.javaSerialization(BeanInterface.class);
+ Dataset<BeanInterface> ds = spark.createDataset(data, javaEncoder);
+ List<BeanInterface> collected = ds.collectAsList();
+ Assertions.assertEquals(2, collected.size());
+ Assertions.assertEquals("a", collected.get(0).getValue());
+ Assertions.assertEquals(1, collected.get(0).getId());
+ Assertions.assertEquals("b", collected.get(1).getValue());
+ Assertions.assertEquals(2, collected.get(1).getId());
+ Assertions.assertInstanceOf(BeanImplA.class, collected.get(0));
+ Assertions.assertInstanceOf(BeanImplB.class, collected.get(1));
+ }
+
public class CircularReference1Bean implements Serializable {
private CircularReference2Bean child;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]