This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new d9ed85fd1 fix: Support on all-literal RLIKE expression (#3647)
d9ed85fd1 is described below
commit d9ed85fd186a7aab7273a61bce660c0910127e3a
Author: ChenChen Lai <[email protected]>
AuthorDate: Thu Mar 12 19:21:11 2026 +0800
fix: Support on all-literal RLIKE expression (#3647)
* Native engine crashes on all-literal RLIKE expression
* add test
* address comment
* Update native/spark-expr/src/predicate_funcs/rlike.rs
Co-authored-by: Martin Grigorov <[email protected]>
* address comment test
* fix fmt
---------
Co-authored-by: Martin Grigorov <[email protected]>
---
native/spark-expr/src/predicate_funcs/rlike.rs | 72 +++++++++++++++++++++-
.../sql-tests/expressions/string/rlike_enabled.sql | 2 +-
2 files changed, 70 insertions(+), 4 deletions(-)
diff --git a/native/spark-expr/src/predicate_funcs/rlike.rs
b/native/spark-expr/src/predicate_funcs/rlike.rs
index 099e9852c..ed5970a6a 100644
--- a/native/spark-expr/src/predicate_funcs/rlike.rs
+++ b/native/spark-expr/src/predicate_funcs/rlike.rs
@@ -21,7 +21,7 @@ use arrow::array::types::Int32Type;
use arrow::array::{Array, BooleanArray, DictionaryArray, RecordBatch,
StringArray};
use arrow::compute::take;
use arrow::datatypes::{DataType, Schema};
-use datafusion::common::{internal_err, Result};
+use datafusion::common::{internal_err, Result, ScalarValue};
use datafusion::physical_expr::PhysicalExpr;
use datafusion::physical_plan::ColumnarValue;
use regex::Regex;
@@ -140,8 +140,24 @@ impl PhysicalExpr for RLike {
let array = self.is_match(inputs);
Ok(ColumnarValue::Array(Arc::new(array)))
}
- ColumnarValue::Scalar(_) => {
- internal_err!("non scalar regexp patterns are not supported")
+ ColumnarValue::Scalar(scalar) => {
+ if scalar.is_null() {
+ return
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
+ }
+
+ let is_match = match scalar {
+ ScalarValue::Utf8(Some(s))
+ | ScalarValue::LargeUtf8(Some(s))
+ | ScalarValue::Utf8View(Some(s)) =>
self.pattern.is_match(&s),
+ _ => {
+ return internal_err!(
+ "RLike requires string type for input, got {:?}",
+ scalar.data_type()
+ );
+ }
+ };
+
+ Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(is_match))))
}
}
}
@@ -165,3 +181,53 @@ impl PhysicalExpr for RLike {
Display::fmt(self, f)
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use datafusion::physical_expr::expressions::Literal;
+
+ #[test]
+ fn test_rlike_scalar_string_variants() {
+ let pattern = "R[a-z]+";
+ let scalars = [
+ ScalarValue::Utf8(Some("Rose".to_string())),
+ ScalarValue::LargeUtf8(Some("Rose".to_string())),
+ ScalarValue::Utf8View(Some("Rose".to_string())),
+ ];
+
+ for scalar in scalars {
+ let expr = RLike::try_new(Arc::new(Literal::new(scalar.clone())),
pattern).unwrap();
+ let result = expr
+ .evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())))
+ .unwrap();
+ let ColumnarValue::Scalar(result) = result else {
+ panic!("expected scalar result");
+ };
+ assert_eq!(result, ScalarValue::Boolean(Some(true)));
+ }
+
+ // Null input should produce a null boolean result
+ let expr =
+ RLike::try_new(Arc::new(Literal::new(ScalarValue::Utf8(None))),
pattern).unwrap();
+ let result = expr
+ .evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())))
+ .unwrap();
+ let ColumnarValue::Scalar(result) = result else {
+ panic!("expected scalar result");
+ };
+ assert_eq!(result, ScalarValue::Boolean(None));
+ }
+
+ #[test]
+ fn test_rlike_scalar_non_string_error() {
+ let expr = RLike::try_new(
+ Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))),
+ "R[a-z]+",
+ )
+ .unwrap();
+
+ let result =
expr.evaluate(&RecordBatch::new_empty(Arc::new(Schema::empty())));
+ assert!(result.is_err());
+ }
+}
diff --git
a/spark/src/test/resources/sql-tests/expressions/string/rlike_enabled.sql
b/spark/src/test/resources/sql-tests/expressions/string/rlike_enabled.sql
index 822fb3ddb..1de215a77 100644
--- a/spark/src/test/resources/sql-tests/expressions/string/rlike_enabled.sql
+++ b/spark/src/test/resources/sql-tests/expressions/string/rlike_enabled.sql
@@ -35,5 +35,5 @@ query
SELECT s RLIKE '' FROM test_rlike_enabled
-- literal arguments
-query ignore(https://github.com/apache/datafusion-comet/issues/3343)
+query
SELECT 'hello' RLIKE '^[a-z]+$', '12345' RLIKE '^[a-z]+$', '' RLIKE '', NULL
RLIKE 'a'
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]