This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 0ac494cb981 branch-4.0: [feature](udf) support varbinary type in
java-udf #56181 (#56468)
0ac494cb981 is described below
commit 0ac494cb981db88ec110efdcec1235b324d99a9e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Sep 27 09:53:51 2025 +0800
branch-4.0: [feature](udf) support varbinary type in java-udf #56181
(#56468)
Cherry-picked from #56181
Co-authored-by: zhangstar333 <[email protected]>
---
be/src/util/jni-util.cpp | 28 ++---
be/src/util/jni_native_method.cpp | 85 ++++++++++++--
be/src/util/jni_native_method.h | 11 +-
be/src/vec/columns/column_varbinary.cpp | 15 ---
be/src/vec/columns/column_varbinary.h | 4 -
be/src/vec/exec/jni_connector.cpp | 29 ++---
be/test/vec/columns/column_varbinary_test.cpp | 20 ----
.../docker-compose/mysql/init/03-create-table.sql | 4 +
.../docker-compose/mysql/init/04-insert.sql | 1 +
.../doris/common/jni/utils/JNINativeMethod.java | 17 ++-
.../doris/common/jni/utils/JavaUdfDataType.java | 5 +
.../org/apache/doris/common/jni/utils/OffHeap.java | 42 +++++++
.../apache/doris/common/jni/vec/ColumnType.java | 9 +-
.../apache/doris/common/jni/vec/VectorColumn.java | 129 +++++++++++++++++++--
.../java/org/apache/doris/udf/BaseExecutor.java | 44 +++++++
.../org/apache/doris/jdbc/BaseJdbcExecutor.java | 6 +-
.../java/org/apache/doris/catalog/ScalarType.java | 4 +-
.../main/java/org/apache/doris/catalog/Type.java | 1 +
.../nereids/rules/expression/check/CheckCast.java | 6 +
.../trees/plans/commands/info/BaseViewInfo.java | 4 +
.../trees/plans/commands/info/CreateMTMVInfo.java | 3 +
.../trees/plans/commands/info/CreateTableInfo.java | 3 +
.../org/apache/doris/nereids/types/DataType.java | 5 +-
.../apache/doris/statistics/ColumnStatistic.java | 4 +-
.../org/apache/doris/catalog/CreateViewTest.java | 8 ++
.../nereids/parser/VarBinaryLiteralParserTest.java | 92 +++++++++------
.../trees/plans/CreateTableCommandTest.java | 37 ++++++
.../select/test_mysql_all_types_select.out | 8 ++
.../select/test_mysql_varbinary_with_udf.out | 26 +++++
.../java/org/apache/doris/udf/VarBinaryTest.java | 31 ++---
.../java/org/apache/doris/udf/VarBinaryTest2.java | 31 ++---
.../select/test_mysql_all_types_select.groovy | 4 +-
.../select/test_mysql_varbinary_with_udf.groovy | 70 +++++++++++
33 files changed, 604 insertions(+), 182 deletions(-)
diff --git a/be/src/util/jni-util.cpp b/be/src/util/jni-util.cpp
index 55e95cb431c..369f4dd80c4 100644
--- a/be/src/util/jni-util.cpp
+++ b/be/src/util/jni-util.cpp
@@ -591,20 +591,22 @@ Status JniUtil::Init() {
if (env->ExceptionOccurred()) {
return Status::JniError("Failed to delete local reference to
JNINativeMethod class.");
}
- std::string resize_column_name = "resizeStringColumn";
- std::string resize_column_sign = "(JI)J";
- std::string memory_alloc_name = "memoryTrackerMalloc";
- std::string memory_alloc_sign = "(J)J";
- std::string memory_free_name = "memoryTrackerFree";
- std::string memory_free_sign = "(J)V";
+
+ static char memory_alloc_name[] = "memoryTrackerMalloc";
+ static char memory_alloc_sign[] = "(J)J";
+ static char memory_free_name[] = "memoryTrackerFree";
+ static char memory_free_sign[] = "(J)V";
+ static char memory_alloc_batch_name[] = "memoryTrackerMallocBatch";
+ static char memory_alloc_batch_sign[] = "([I)[J";
+ static char memory_free_batch_name[] = "memoryTrackerFreeBatch";
+ static char memory_free_batch_sign[] = "([J)V";
static JNINativeMethod java_native_methods[] = {
- {const_cast<char*>(resize_column_name.c_str()),
- const_cast<char*>(resize_column_sign.c_str()),
- (void*)&JavaNativeMethods::resizeStringColumn},
- {const_cast<char*>(memory_alloc_name.c_str()),
- const_cast<char*>(memory_alloc_sign.c_str()),
(void*)&JavaNativeMethods::memoryMalloc},
- {const_cast<char*>(memory_free_name.c_str()),
- const_cast<char*>(memory_free_sign.c_str()),
(void*)&JavaNativeMethods::memoryFree},
+ {memory_alloc_name, memory_alloc_sign,
(void*)&JavaNativeMethods::memoryMalloc},
+ {memory_free_name, memory_free_sign,
(void*)&JavaNativeMethods::memoryFree},
+ {memory_alloc_batch_name, memory_alloc_batch_sign,
+ (void*)&JavaNativeMethods::memoryMallocBatch},
+ {memory_free_batch_name, memory_free_batch_sign,
+ (void*)&JavaNativeMethods::memoryFreeBatch},
};
int res = env->RegisterNatives(jni_native_method_exc_cl_,
java_native_methods,
diff --git a/be/src/util/jni_native_method.cpp
b/be/src/util/jni_native_method.cpp
index 50f43c4d9ab..ce01b521b60 100644
--- a/be/src/util/jni_native_method.cpp
+++ b/be/src/util/jni_native_method.cpp
@@ -17,20 +17,16 @@
#include "jni_native_method.h"
-#include <stdlib.h>
+#include <glog/logging.h>
+
+#include <cstdlib>
+#include <vector>
#include "jni.h"
-#include "vec/columns/column_string.h"
+#include "util/defer_op.h"
namespace doris {
-jlong JavaNativeMethods::resizeStringColumn(JNIEnv* env, jclass clazz, jlong
columnAddr,
- jint length) {
- auto column =
reinterpret_cast<vectorized::ColumnString::Chars*>(columnAddr);
- column->resize(length);
- return reinterpret_cast<jlong>(column->data());
-}
-
jlong JavaNativeMethods::memoryMalloc(JNIEnv* env, jclass clazz, jlong bytes) {
return reinterpret_cast<long>(malloc(bytes));
}
@@ -39,4 +35,75 @@ void JavaNativeMethods::memoryFree(JNIEnv* env, jclass
clazz, jlong address) {
free(reinterpret_cast<void*>(address));
}
+jlongArray JavaNativeMethods::memoryMallocBatch(JNIEnv* env, jclass clazz,
jintArray sizes) {
+ DCHECK(sizes != nullptr);
+ jsize n = env->GetArrayLength(sizes);
+ DCHECK(n > 0);
+ jint* elems = env->GetIntArrayElements(sizes, nullptr);
+ if (elems == nullptr) {
+ return nullptr;
+ }
+ DEFER({
+ if (elems != nullptr) {
+ env->ReleaseIntArrayElements(sizes, elems, JNI_ABORT);
+ }
+ });
+
+ jlongArray result = env->NewLongArray(n);
+ if (result == nullptr) {
+ return nullptr;
+ }
+
+ std::vector<void*> allocated;
+ allocated.reserve(n);
+
+ // sizes are validated on Java side: n > 0 and each size > 0
+ bool failed = false;
+ for (jsize i = 0; i < n; ++i) {
+ auto sz = static_cast<size_t>(elems[i]);
+ void* p = malloc(sz);
+ if (p == nullptr) {
+ failed = true;
+ break;
+ }
+ allocated.push_back(p);
+ }
+
+ if (failed) {
+ for (void* p : allocated) {
+ if (p != nullptr) {
+ free(p);
+ }
+ }
+ return nullptr;
+ }
+
+ std::vector<jlong> addrs(n);
+ for (jsize i = 0; i < n; ++i) {
+ addrs[i] = reinterpret_cast<jlong>(allocated[i]);
+ }
+ env->SetLongArrayRegion(result, 0, n, addrs.data());
+ return result;
+}
+
+void JavaNativeMethods::memoryFreeBatch(JNIEnv* env, jclass clazz, jlongArray
addrs) {
+ if (addrs == nullptr) {
+ return;
+ }
+ jsize n = env->GetArrayLength(addrs);
+ if (n <= 0) {
+ return;
+ }
+ jlong* elems = env->GetLongArrayElements(addrs, nullptr);
+ if (elems == nullptr) {
+ return;
+ }
+ for (jsize i = 0; i < n; ++i) {
+ if (elems[i] != 0) {
+ free(reinterpret_cast<void*>(elems[i]));
+ }
+ }
+ env->ReleaseLongArrayElements(addrs, elems, JNI_ABORT);
+}
+
} // namespace doris
diff --git a/be/src/util/jni_native_method.h b/be/src/util/jni_native_method.h
index e3dc3fb3dd9..48c74d91d67 100644
--- a/be/src/util/jni_native_method.h
+++ b/be/src/util/jni_native_method.h
@@ -27,11 +27,6 @@ namespace doris {
* Java native methods for org.apache.doris.common.jni.utils.JNINativeMethod.
*/
struct JavaNativeMethods {
- /**
- * Resize string column and return the new column address.
- */
- static jlong resizeStringColumn(JNIEnv* env, jclass clazz, jlong
columnAddr, jint length);
-
/**
* Allocate memory, which will be tracked by memory tracker.
*/
@@ -41,6 +36,12 @@ struct JavaNativeMethods {
* Free memory, which will be tracked by memory tracker.
*/
static void memoryFree(JNIEnv* env, jclass clazz, jlong address);
+
+ // Batch allocate multiple blocks; sizes is an int[]; returns long[] of
addresses.
+ static jlongArray memoryMallocBatch(JNIEnv* env, jclass clazz, jintArray
sizes);
+
+ // Batch free multiple addresses; addrs is a long[]
+ static void memoryFreeBatch(JNIEnv* env, jclass clazz, jlongArray addrs);
};
} // namespace doris
diff --git a/be/src/vec/columns/column_varbinary.cpp
b/be/src/vec/columns/column_varbinary.cpp
index 173c6876d5f..00698921e60 100644
--- a/be/src/vec/columns/column_varbinary.cpp
+++ b/be/src/vec/columns/column_varbinary.cpp
@@ -162,20 +162,5 @@ void ColumnVarbinary::replace_column_data(const IColumn&
rhs, size_t row, size_t
_data[self_row] = doris::StringView(dst, val.size());
}
-ColumnPtr ColumnVarbinary::convert_to_string_column() const {
- auto string_column = ColumnString::create();
- auto& res_data = assert_cast<ColumnString&>(*string_column).get_chars();
- auto& res_offsets =
assert_cast<ColumnString&>(*string_column).get_offsets();
- res_data.reserve(res_data.size() + byte_size());
- size_t current_offset = 0;
- for (const auto& value : _data) {
- res_data.insert(value.data(), value.data() + value.size());
- current_offset += value.size();
- res_offsets.push_back(current_offset);
- }
- _converted_string_column = string_column->get_ptr();
- return _converted_string_column;
-}
-
#include "common/compile_check_end.h"
} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_varbinary.h
b/be/src/vec/columns/column_varbinary.h
index a20012c3f5a..20417ba2d66 100644
--- a/be/src/vec/columns/column_varbinary.h
+++ b/be/src/vec/columns/column_varbinary.h
@@ -159,13 +159,9 @@ public:
return _data[row].size() + sizeof(uint32_t);
}
- ColumnPtr convert_to_string_column() const;
-
private:
Container _data;
Arena _arena;
- // used in convert_to_string_column, maybe need a better way to deal with
it
- mutable ColumnPtr _converted_string_column;
};
#include "common/compile_check_end.h"
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/jni_connector.cpp
b/be/src/vec/exec/jni_connector.cpp
index 21e08ec0927..91457cb7718 100644
--- a/be/src/vec/exec/jni_connector.cpp
+++ b/be/src/vec/exec/jni_connector.cpp
@@ -388,16 +388,22 @@ Status JniConnector::_fill_column(TableMetaAddress&
address, ColumnPtr& doris_co
Status JniConnector::_fill_varbinary_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
size_t num_rows) {
+ auto* meta_base = reinterpret_cast<char*>(address.next_meta_as_ptr());
auto& varbinary_col = assert_cast<ColumnVarbinary&>(*doris_column);
- int* offsets = reinterpret_cast<int*>(address.next_meta_as_ptr());
- char* chars = reinterpret_cast<char*>(address.next_meta_as_ptr());
- if (num_rows == 0) {
- return Status::OK();
- }
+ // Java side writes per-row metadata as 16 bytes: [len: long][addr: long]
for (size_t i = 0; i < num_rows; ++i) {
- int start_offset = (i == 0) ? 0 : offsets[i - 1];
- int end_offset = offsets[i];
- varbinary_col.insert_data(chars + start_offset, end_offset -
start_offset);
+ // Read length (first 8 bytes)
+ int64_t len = 0;
+ memcpy(&len, meta_base + 16 * i, sizeof(len));
+ if (len <= 0) {
+ varbinary_col.insert_default();
+ } else {
+ // Read address (next 8 bytes)
+ uint64_t addr_u = 0;
+ memcpy(&addr_u, meta_base + 16 * i + 8, sizeof(addr_u));
+ const char* src = reinterpret_cast<const char*>(addr_u);
+ varbinary_col.insert_data(src, static_cast<size_t>(len));
+ }
}
return Status::OK();
}
@@ -784,12 +790,9 @@ Status JniConnector::_fill_column_meta(const ColumnPtr&
doris_column, const Data
break;
}
case PrimitiveType::TYPE_VARBINARY: {
- // TODO, here is maybe not efficient, need optimize later
const auto& varbinary_col = assert_cast<const
ColumnVarbinary&>(*data_column);
- auto string_column_ptr = varbinary_col.convert_to_string_column();
- const auto& string_col = assert_cast<const
ColumnString&>(*string_column_ptr);
- meta_data.emplace_back((long)string_col.get_offsets().data());
- meta_data.emplace_back((long)string_col.get_chars().data());
+ meta_data.emplace_back(
+ (long)assert_cast<const
ColumnVarbinary&>(varbinary_col).get_data().data());
break;
}
default:
diff --git a/be/test/vec/columns/column_varbinary_test.cpp
b/be/test/vec/columns/column_varbinary_test.cpp
index b2ab4df735f..f593c1d2001 100644
--- a/be/test/vec/columns/column_varbinary_test.cpp
+++ b/be/test/vec/columns/column_varbinary_test.cpp
@@ -335,26 +335,6 @@ TEST_F(ColumnVarbinaryTest,
SerializeSizeAtShouldIncludeLengthHeader) {
EXPECT_EQ(sz, v.size() + sizeof(uint32_t));
}
-TEST_F(ColumnVarbinaryTest, ConvertToStringColumn) {
- auto col = ColumnVarbinary::create();
- std::vector<std::string> vals = {make_bytes(0, 0x00), make_bytes(3, 0x80),
- make_bytes(doris::StringView::kInlineSize
+ 2, 0x81)};
- for (auto& v : vals) {
- col->insert_data(v.data(), v.size());
- }
-
- auto str_mut =
assert_cast<ColumnVarbinary&>(*col).convert_to_string_column();
- auto str_col_ptr = std::move(str_mut);
- const auto& str_col = assert_cast<const ColumnString&>(*str_col_ptr);
-
- ASSERT_EQ(str_col.size(), vals.size());
- for (size_t i = 0; i < vals.size(); ++i) {
- auto r = str_col.get_data_at(i);
- ASSERT_EQ(r.size, vals[i].size());
- ASSERT_EQ(memcmp(r.data, vals[i].data(), r.size), 0);
- }
-}
-
TEST_F(ColumnVarbinaryTest, FieldAccessOperatorAndGet) {
auto col = ColumnVarbinary::create();
std::vector<std::string> vals = {
diff --git a/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql
b/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql
index 9acb2bec271..789dcc2080a 100644
--- a/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql
+++ b/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql
@@ -523,4 +523,8 @@ CREATE TABLE test_varbinary_db.`test_varbinary` (
`id` int(11) DEFAULT NULL,
`varbinary_c` varbinary(100)
);
+CREATE TABLE test_varbinary_db.`test_varbinary_udf` (
+ `id` int(11) DEFAULT NULL,
+ `varbinary_c` varbinary(100)
+);
diff --git a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
index e5a7710f8ff..0677c6e9096 100644
--- a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
+++ b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
@@ -1210,6 +1210,7 @@ INSERT INTO doris_test.`test_cast` VALUES (1, '1',
'2022-01-01', '2022-01-01 00:
INSERT INTO doris_test.`test_cast` VALUES (2, '2', '2022-01-02', '2022-01-02
00:00:01');
INSERT INTO test_varbinary_db.`test_varbinary` VALUES (1,
X'48656C6C6F20576F726C64'), (2, X'48656C6C6F20576F726C6421');
+INSERT INTO test_varbinary_db.`test_varbinary_udf` VALUES (1,
X'48656C6C6F20576F726C64'), (2, X'48656C6C6F20576F726C6421'), (3, NULL), (4,
X'AB'), (5, X'ABCDEF');
ANALYZE TABLE Doris.doris;
ANALYZE TABLE Doris.DORIS;
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
index 50820c2ecb9..9ba171e5c53 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
@@ -21,11 +21,6 @@ package org.apache.doris.common.jni.utils;
* Native method in doris::JavaNativeMethods.
*/
public class JNINativeMethod {
- /**
- * Resize string column and return the new column address in off heap.
- */
- public static native long resizeStringColumn(long columnAddr, int
byteSize);
-
/**
* Allocate memory in off heap, which will be tracked by memory tracker.
*/
@@ -35,4 +30,16 @@ public class JNINativeMethod {
* Free memory in off heap, which will be tracked by memory tracker.
*/
public static native void memoryTrackerFree(long address);
+
+ /**
+ * Allocate multiple off-heap memory blocks in a single JNI call. Each
element in sizes
+ * corresponds to one allocation. Returns an array of addresses with the
same length.
+ */
+ public static native long[] memoryTrackerMallocBatch(int[] sizes);
+
+ /**
+ * Free multiple off-heap memory blocks in a single JNI call. Each element
in addrs
+ * corresponds to one deallocation.
+ */
+ public static native void memoryTrackerFreeBatch(long[] addrs);
}
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java
index 2114ff7c4a4..7df4ca36119 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java
@@ -60,6 +60,8 @@ public class JavaUdfDataType {
public static final JavaUdfDataType ARRAY_TYPE = new
JavaUdfArrayType("ARRAY_TYPE", TPrimitiveType.ARRAY, 0);
public static final JavaUdfDataType MAP_TYPE = new
JavaUdfMapType("MAP_TYPE", TPrimitiveType.MAP, 0);
public static final JavaUdfDataType STRUCT_TYPE = new
JavaUdfStructType("STRUCT_TYPE", TPrimitiveType.STRUCT, 0);
+ public static final JavaUdfDataType VARBINARY_TYPE = new
JavaUdfDataType("VARBINARY_TYPE", TPrimitiveType.VARBINARY,
+ 0);
private static final Map<TPrimitiveType, JavaUdfDataType>
javaUdfDataTypeMap = new HashMap<>();
@@ -91,6 +93,7 @@ public class JavaUdfDataType {
addJavaUdfDataType(STRUCT_TYPE);
addJavaUdfDataType(IPV4);
addJavaUdfDataType(IPV6);
+ addJavaUdfDataType(VARBINARY_TYPE);
}
private final String description;
@@ -160,6 +163,8 @@ public class JavaUdfDataType {
return Sets.newHashSet(JavaUdfDataType.MAP_TYPE);
} else if (c == InetAddress.class) {
return Sets.newHashSet(JavaUdfDataType.IPV4, JavaUdfDataType.IPV6);
+ } else if (c == Byte[].class || c == byte[].class) {
+ return Sets.newHashSet(JavaUdfDataType.VARBINARY_TYPE);
}
return Sets.newHashSet(JavaUdfDataType.INVALID_TYPE);
}
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java
index b84bfbe1e61..d6822881ecc 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java
@@ -199,6 +199,48 @@ public class OffHeap {
return newMemory;
}
+ /**
+ * Allocate a batch of off-heap memory blocks via single JNI call.
+ * When testing mode is enabled, this falls back to per-block
allocateMemory wrapper.
+ */
+ public static long[] allocateMemoryBatch(int[] sizes) {
+ if (sizes == null || sizes.length == 0) {
+ return new long[0];
+ }
+ if (IS_TESTING) {
+ long[] addrs = new long[sizes.length];
+ for (int i = 0; i < sizes.length; i++) {
+ addrs[i] = allocateMemory(sizes[i]);
+ }
+ return addrs;
+ } else {
+ long[] addrs = JNINativeMethod.memoryTrackerMallocBatch(sizes);
+ if (addrs == null) {
+ throw new OutOfMemoryError("memoryTrackerMallocBatch failed
for " + sizes.length + " blocks");
+ }
+ return addrs;
+ }
+ }
+
+ /**
+ * Free a batch of off-heap memory blocks via single JNI call.
+ * When testing mode is enabled, this falls back to per-block freeMemory
wrapper.
+ */
+ public static void freeMemoryBatch(long[] addrs) {
+ if (addrs == null || addrs.length == 0) {
+ return;
+ }
+ if (IS_TESTING) {
+ for (long addr : addrs) {
+ if (addr != 0) {
+ freeMemory(addr);
+ }
+ }
+ } else {
+ JNINativeMethod.memoryTrackerFreeBatch(addrs);
+ }
+ }
+
public static void copyMemory(Object src, long srcOffset, Object dst, long
dstOffset, long length) {
// Check if dstOffset is before or after srcOffset to determine if we
should copy
// forward or backwards. This is necessary in case src and dst overlap.
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java
index bda08dd83e5..c39d2aa1a9f 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java
@@ -147,8 +147,11 @@ public class ColumnType {
}
public boolean isStringType() {
- return type == Type.STRING || type == Type.BINARY || type ==
Type.VARBINARY || type == Type.CHAR
- || type == Type.VARCHAR;
+ return type == Type.STRING || type == Type.CHAR || type ==
Type.VARCHAR;
+ }
+
+ public boolean isVarbinaryType() {
+ return type == Type.BINARY || type == Type.VARBINARY;
}
public boolean isComplexType() {
@@ -239,10 +242,8 @@ public class ColumnType {
}
return size;
case STRING:
- case BINARY:
case CHAR:
case VARCHAR:
- case VARBINARY:
// [const | nullMap | offsets | data ]
return 4;
default:
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
index 89b44c3403e..594033a096f 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
@@ -211,10 +211,12 @@ public class VectorColumn {
return new VectorColumn(columnType, capacity);
}
+ // create readable column
public static VectorColumn createReadableColumn(ColumnType columnType, int
numRows, long columnMetaAddress) {
return new VectorColumn(columnType, numRows, columnMetaAddress);
}
+ // create this.meta column
public static VectorColumn createReadableColumn(long address, int
capacity, ColumnType columnType) {
return new VectorColumn(address, capacity, columnType);
}
@@ -276,8 +278,13 @@ public class VectorColumn {
OffHeap.freeMemory(nullMap);
}
if (data != 0) {
- OffHeap.freeMemory(data);
+ if (columnType.isVarbinaryType()) {
+ freeVarbinaryData();
+ } else {
+ OffHeap.freeMemory(data);
+ }
}
+
if (offsets != 0) {
OffHeap.freeMemory(offsets);
}
@@ -290,6 +297,24 @@ public class VectorColumn {
isConst = false;
}
+ private void freeVarbinaryData() {
+ long[] addrs = new long[appendIndex];
+ int count = 0;
+ for (int i = 0; i < appendIndex; i++) {
+ long entry = data + 16L * i;
+ long len = OffHeap.getLong(null, entry);
+ if (len > 0) { // 0 indicates null or empty
+ long addr = OffHeap.getLong(null, entry + 8);
+ if (addr != 0) {
+ addrs[count++] = addr;
+ }
+ }
+ }
+ if (count > 0) {
+ OffHeap.freeMemoryBatch(java.util.Arrays.copyOf(addrs, count));
+ }
+ }
+
private void throwReserveException(int requiredCapacity, Throwable cause) {
String message = "Cannot reserve enough bytes in off heap memory ("
+ (requiredCapacity >= 0 ? "requested " + requiredCapacity + "
bytes" : "integer overflow).");
@@ -327,8 +352,10 @@ public class VectorColumn {
this.data = OffHeap.reallocateMemory(data, oldCapacity * typeSize,
newCapacity * typeSize);
} else if (columnType.isStringType() || columnType.isArray() ||
columnType.isMap()) {
this.offsets = OffHeap.reallocateMemory(offsets, oldOffsetSize,
newOffsetSize);
+ } else if (columnType.isVarbinaryType()) {
+ this.data = OffHeap.reallocateMemory(data, oldCapacity * 16L,
newCapacity * 16L);
} else if (!columnType.isStruct()) {
- throw new RuntimeException("Unhandled type: " + columnType);
+ throw new RuntimeException("Unhandled type: " +
columnType.getName());
}
if (!"#stringBytes".equals(columnType.getName())) {
this.nullMap = OffHeap.reallocateMemory(nullMap, oldCapacity,
newCapacity);
@@ -423,7 +450,6 @@ public class VectorColumn {
case CHAR:
case VARCHAR:
case STRING:
- case BINARY:
return appendBytesAndOffset(new byte[0]);
case ARRAY:
return appendArray(Collections.emptyList());
@@ -431,8 +457,9 @@ public class VectorColumn {
return appendMap(Collections.emptyList(),
Collections.emptyList());
case STRUCT:
return appendStruct(structFieldIndex, null);
+ case BINARY:
case VARBINARY:
- return appendBytesAndOffset(new byte[0]);
+ return appendVarbinary(new byte[0]);
default:
throw new RuntimeException("Unknown type value: " + typeValue);
}
@@ -1448,6 +1475,89 @@ public class VectorColumn {
return result;
}
+ public int appendVarbinary(byte[] src) {
+ long addr = OffHeap.allocateMemory(src.length);
+ OffHeap.copyMemory(src, OffHeap.BYTE_ARRAY_OFFSET, null, addr,
src.length);
+ reserve(appendIndex + 1);
+ OffHeap.putLong(null, data + 16L * appendIndex, src.length);
+ OffHeap.putLong(null, data + 16L * appendIndex + 8, addr);
+ return appendIndex++;
+ }
+
+ public void appendVarbinary(byte[][] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
+ reserve(appendIndex + batch.length);
+ int rows = batch.length;
+ int[] sizes = new int[rows];
+ int allocCount = 0;
+ for (int i = 0; i < rows; i++) {
+ byte[] v = batch[i];
+ if (v != null && v.length > 0) {
+ sizes[allocCount++] = v.length;
+ }
+ }
+ long[] addrs = allocCount == 0 ? new long[0]
+ : OffHeap.allocateMemoryBatch(java.util.Arrays.copyOf(sizes,
allocCount));
+ if (allocCount != 0) {
+ if (addrs == null) {
+ throw new OutOfMemoryError("allocateMemoryBatch returned
null");
+ }
+ }
+ int cursor = 0;
+ for (int i = 0; i < rows; i++) {
+ byte[] v = batch[i];
+ if (v == null) {
+ putNull(appendIndex);
+ OffHeap.putLong(null, data + 16L * appendIndex, 0L);
+ OffHeap.putLong(null, data + 16L * appendIndex + 8, 0L);
+ } else if (v.length == 0) {
+ OffHeap.putLong(null, data + 16L * appendIndex, 0L);
+ OffHeap.putLong(null, data + 16L * appendIndex + 8, 0L);
+ } else {
+ long addr = addrs[cursor++];
+ OffHeap.copyMemory(v, OffHeap.BYTE_ARRAY_OFFSET, null, addr,
v.length);
+ OffHeap.putLong(null, data + 16L * appendIndex, (long)
v.length);
+ OffHeap.putLong(null, data + 16L * appendIndex + 8, addr);
+ }
+ appendIndex++;
+ }
+ }
+
+ public byte[][] getVarBinaryColumn(int start, int end) {
+ byte[][] result = new byte[end - start][];
+ for (int i = start; i < end; ++i) {
+ if (!isNullAt(i)) {
+ result[i - start] = getBytesVarbinary(i);
+ }
+ }
+ return result;
+ }
+
+ public byte[] getBytesVarbinary(int rowId) {
+ // Each row is a 16-byte StringView struct at data + 16*rowId
+ // layout:
+ // [0..3] uint32 size
+ // [4..15] inline bytes (when size <= 12) OR
+ // [8..15] const char* data (when size > 12)
+ long entry = data + 16L * rowId;
+ int size = OffHeap.getInt(null, entry);
+ if (size == 0) {
+ return new byte[0];
+ }
+ if (size <= 12) {
+ // For inline case, the first `size` bytes are stored contiguously
+ // starting at entry + 4 across the 12-byte inline area.
+ byte[] out = new byte[size];
+ OffHeap.copyMemory(null, entry + 4, out,
OffHeap.BYTE_ARRAY_OFFSET, size);
+ return out;
+ } else {
+ long addr = OffHeap.getLong(null, entry + 8);
+ return OffHeap.getByte(null, addr, size);
+ }
+ }
+
public void updateMeta(VectorColumn meta) {
if (columnType.isUnsupported()) {
meta.appendLong(0);
@@ -1606,8 +1716,9 @@ public class VectorColumn {
case STRUCT:
appendStruct((Map<String, Object>[]) batch, isNullable);
break;
+ case BINARY:
case VARBINARY:
- appendBinaryAndOffset((byte[][]) batch, isNullable);
+ appendVarbinary((byte[][]) batch, isNullable);
break;
default:
throw new RuntimeException("Unknown type value: " +
columnType.getType());
@@ -1661,8 +1772,9 @@ public class VectorColumn {
return getMapColumn(start, end);
case STRUCT:
return getStructColumn(start, end);
+ case BINARY:
case VARBINARY:
- return getBinaryColumn(start, end);
+ return getVarBinaryColumn(start, end);
default:
throw new RuntimeException("Unknown type value: " +
columnType.getType());
}
@@ -1730,7 +1842,8 @@ public class VectorColumn {
}
break;
case BINARY:
- appendBytesAndOffset(o.getBytes());
+ case VARBINARY:
+ appendVarbinary(o.getBytes());
break;
case ARRAY: {
List<ColumnValue> values = new ArrayList<>();
@@ -1811,7 +1924,7 @@ public class VectorColumn {
break;
case BINARY:
case VARBINARY: {
- byte[] bytes = getBytesWithOffset(i);
+ byte[] bytes = getBytesVarbinary(i);
sb.append(dumpHex(bytes));
sb.append(" ASCII:(");
boolean printable = true;
diff --git
a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
index 6aa23b32c1e..cd66b074fc9 100644
---
a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
+++
b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
@@ -274,6 +274,28 @@ public abstract class BaseExecutor {
return result;
};
}
+ case VARBINARY: {
+ // Support Byte[] parameter for VARBINARY by boxing from byte[]
+ if (Byte[].class.equals(clz)) {
+ return (Object[] columnData) -> {
+ Byte[][] result = new Byte[columnData.length][];
+ for (int i = 0; i < columnData.length; ++i) {
+ if (columnData[i] != null) {
+ byte[] v = (byte[]) columnData[i];
+ Byte[] boxed = new Byte[v.length];
+ for (int k = 0; k < v.length; ++k) {
+ boxed[k] = v[k];
+ }
+ result[i] = boxed;
+ }
+ }
+ return result;
+ };
+ } else if (!byte[].class.equals(clz)) {
+ throw new UdfRuntimeException("Unsupported varbinary type:
" + clz.getCanonicalName());
+ }
+ break;
+ }
default:
break;
}
@@ -362,6 +384,28 @@ public abstract class BaseExecutor {
return result;
};
}
+ case VARBINARY: {
+ // Support Byte[] return for VARBINARY by unboxing to byte[]
for storage
+ if (Byte[].class.equals(clz)) {
+ return (Object[] columnData) -> {
+ byte[][] result = new byte[columnData.length][];
+ for (int i = 0; i < columnData.length; ++i) {
+ if (columnData[i] != null) {
+ Byte[] v = (Byte[]) columnData[i];
+ byte[] unboxed = new byte[v.length];
+ for (int k = 0; k < v.length; ++k) {
+ unboxed[k] = v[k];
+ }
+ result[i] = unboxed;
+ }
+ }
+ return result;
+ };
+ } else if (!byte[].class.equals(clz)) {
+ throw new UdfRuntimeException("Unsupported varbinary type:
" + clz.getCanonicalName());
+ }
+ break;
+ }
default:
break;
}
diff --git
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java
index b6c8561ffd1..0ef1f646d93 100644
---
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java
+++
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java
@@ -614,11 +614,11 @@ public abstract class BaseJdbcExecutor implements
JdbcExecutor {
case CHAR:
case VARCHAR:
case STRING:
- case BINARY:
preparedStatement.setString(parameterIndex,
column.getStringWithOffset(rowIdx));
break;
+ case BINARY:
case VARBINARY:
- preparedStatement.setBytes(parameterIndex,
column.getBytesWithOffset(rowIdx));
+ preparedStatement.setBytes(parameterIndex,
column.getBytesVarbinary(rowIdx));
break;
default:
throw new RuntimeException("Unknown type value: " + dorisType);
@@ -667,9 +667,9 @@ public abstract class BaseJdbcExecutor implements
JdbcExecutor {
case CHAR:
case VARCHAR:
case STRING:
- case BINARY:
preparedStatement.setNull(parameterIndex, Types.VARCHAR);
break;
+ case BINARY:
case VARBINARY:
preparedStatement.setNull(parameterIndex, Types.VARBINARY);
break;
diff --git
a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
index 5784953d2fb..c9b3f441a3a 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
@@ -220,11 +220,13 @@ public class ScalarType extends Type {
return IPV4;
case IPV6:
return IPV6;
+ case VARBINARY:
+ return VARBINARY;
case ALL:
return ALL;
default:
LOG.warn("type={}", type);
- Preconditions.checkState(false);
+ Preconditions.checkState(false, "type.name()=" + type.name());
return NULL;
}
}
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index 00d2bf0d155..8f17f85e089 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -304,6 +304,7 @@ public abstract class Type {
.put(PrimitiveType.IPV4,
Sets.newHashSet(InetAddress.class))
.put(PrimitiveType.IPV6,
Sets.newHashSet(InetAddress.class))
.put(PrimitiveType.STRING, Sets.newHashSet(String.class))
+ .put(PrimitiveType.VARBINARY,
Sets.newHashSet(Byte[].class, byte[].class))
.put(PrimitiveType.DATE, DATE_SUPPORTED_JAVA_TYPE)
.put(PrimitiveType.DATEV2, DATE_SUPPORTED_JAVA_TYPE)
.put(PrimitiveType.DATETIME, DATETIME_SUPPORTED_JAVA_TYPE)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java
index 0ce4811b74b..73ea7d93fdd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java
@@ -52,6 +52,7 @@ import org.apache.doris.nereids.types.StructField;
import org.apache.doris.nereids.types.StructType;
import org.apache.doris.nereids.types.TimeV2Type;
import org.apache.doris.nereids.types.TinyIntType;
+import org.apache.doris.nereids.types.VarBinaryType;
import org.apache.doris.nereids.types.VarcharType;
import org.apache.doris.nereids.types.VariantType;
import org.apache.doris.nereids.types.coercion.CharacterType;
@@ -193,6 +194,11 @@ public class CheckCast implements
ExpressionPatternRuleFactory {
allowedTypes.add(QuantileStateType.class);
strictCastWhiteList.put(QuantileStateType.class, allowedTypes);
+ //varbinary
+ allowedTypes = Sets.newHashSet();
+ allowedTypes.add(VarBinaryType.class);
+ strictCastWhiteList.put(VarBinaryType.class, allowedTypes);
+
// array
allowedTypes = Sets.newHashSet();
allowToStringLikeType(allowedTypes);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java
index c575bed5cc7..e56e59acb37 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java
@@ -198,6 +198,10 @@ public class BaseViewInfo {
if (!colSets.add(col.getName())) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_DUP_FIELDNAME, col.getName());
}
+ if (col.getType().isVarbinaryType()) {
+ throw new org.apache.doris.common.AnalysisException(
+ "View does not support VARBINARY type: " +
col.getName());
+ }
try {
FeNameFormat.checkColumnName(col.getName());
} catch (org.apache.doris.common.AnalysisException e) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
index 36f174964b5..11947d2da84 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
@@ -193,6 +193,9 @@ public class CreateMTMVInfo extends CreateTableInfo {
if (!colSets.add(col.getName())) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_DUP_FIELDNAME, col.getName());
}
+ if (col.getType().isVarBinaryType()) {
+ throw new AnalysisException("MTMV do not support varbinary
type : " + col.getName());
+ }
col.validate(true, keysSet, Sets.newHashSet(),
finalEnableMergeOnWrite, KeysType.DUP_KEYS);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
index b76f0d52a04..e129bcb897c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
@@ -445,6 +445,9 @@ public class CreateTableInfo {
throw new AnalysisException(
"Disable to create table column with name start with
__DORIS_: " + columnNameUpperCase);
}
+ if (columnDef.getType().isVarBinaryType()) {
+ throw new AnalysisException("doris do not support varbinary
create table, could use it by catalog");
+ }
if (columnDef.getType().isVariantType()) {
if (columnNameUpperCase.indexOf('.') != -1) {
throw new AnalysisException(
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
index d1f919a951f..50bc32b910c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
@@ -372,9 +372,8 @@ public abstract class DataType {
break;
case "varbinary":
// NOTICE, Maybe. not supported create table, and varbinary do
not have len now
- // dataType = VarBinaryType.INSTANCE;
- // break;
- throw new AnalysisException("doris do not support varbinary
create table, could use it by catalog");
+ dataType = VarBinaryType.INSTANCE;
+ break;
default:
throw new AnalysisException("Nereids do not support type: " +
type);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 58b340ec912..e5e0ce14182 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -59,8 +59,8 @@ public class ColumnStatistic {
.build();
public static final Set<Type> UNSUPPORTED_TYPE = Sets.newHashSet(Type.HLL,
Type.BITMAP, Type.ARRAY, Type.STRUCT,
- Type.MAP, Type.QUANTILE_STATE, Type.JSONB, Type.VARIANT,
Type.TIMEV2, Type.LAMBDA_FUNCTION);
-
+ Type.MAP, Type.QUANTILE_STATE, Type.JSONB, Type.VARIANT,
Type.TIMEV2, Type.LAMBDA_FUNCTION,
+ Type.VARBINARY);
// ATTENTION: Stats deriving WILL NOT use 'count' field any longer.
// Use 'rowCount' field in Statistics if needed.
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java
b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java
index 5e005a7cba8..8a70831c38b 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java
@@ -211,6 +211,14 @@ public class CreateViewTest {
alter1.getInlineViewDef());
}
+ @Test
+ public void testViewRejectVarbinary() throws Exception {
+ ExceptionChecker.expectThrowsWithMsg(
+ org.apache.doris.common.AnalysisException.class,
+ "View does not support VARBINARY type: vb",
+ () -> createView("create view test.vb_view as select X'AB' as
vb;"));
+ }
+
@Test
public void testResetViewDefForRestore() {
View view = new View();
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/VarBinaryLiteralParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/VarBinaryLiteralParserTest.java
index 55bd45fe1e8..4fb1249907b 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/VarBinaryLiteralParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/VarBinaryLiteralParserTest.java
@@ -17,33 +17,22 @@
package org.apache.doris.nereids.parser;
+import org.apache.doris.common.ExceptionChecker;
import org.apache.doris.nereids.analyzer.UnboundOneRowRelation;
import org.apache.doris.nereids.analyzer.UnboundSlot;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.literal.VarBinaryLiteral;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
-import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.utframe.TestWithFeService;
-import mockit.Mock;
-import mockit.MockUp;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
/**
* Dedicated tests for parsing VARBINARY literals (X'...').
*/
-public class VarBinaryLiteralParserTest {
-
- static {
- ConnectContext ctx = new ConnectContext();
- new MockUp<ConnectContext>() {
- @Mock
- public ConnectContext get() {
- return ctx;
- }
- };
- }
+public class VarBinaryLiteralParserTest extends TestWithFeService {
private VarBinaryLiteral extract(String sql) {
NereidsParser parser = new NereidsParser();
@@ -123,53 +112,80 @@ public class VarBinaryLiteralParserTest {
}
@Test
- public void testCreateTableVarbinaryDirect() {
- // expect parse or analysis failure (depending on where VARBINARY is
rejected)
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "CREATE TABLE t_vb (k1 INT, vb VARBINARY) DISTRIBUTED BY
HASH(k1) BUCKETS 1"));
+ public void testCreateTableVarbinaryDirect() throws Exception {
+ String db = "exists_db";
+ String createDbStmtStr = "CREATE DATABASE IF NOT EXISTS " + db;
+ createDatabaseWithSql(createDbStmtStr);
+ useDatabase(db);
+
ExceptionChecker.expectThrowsWithMsg(org.apache.doris.nereids.exceptions.AnalysisException.class,
+ "doris do not support varbinary create table, could use it by
catalog",
+ () -> createTable("create table exists_db.test_varbinary\n"
+ + "(k1 int, k2 VARBINARY)\n"
+ + "duplicate key(k1)\n"
+ + "distributed by hash(k1) buckets 1\n", true));
}
@Test
public void testCreateTableVarbinaryWithLength() {
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "CREATE TABLE t_vb2 (k1 INT, vb VARBINARY(10)) DISTRIBUTED BY
HASH(k1) BUCKETS 1"));
+ Plan plan = new NereidsParser().parseSingle(
+ "CREATE TABLE t_vb2 (k1 INT, vb VARBINARY(10)) DISTRIBUTED BY
HASH(k1) BUCKETS 1");
+ Assertions.assertTrue(plan instanceof
org.apache.doris.nereids.trees.plans.commands.CreateTableCommand);
+ org.apache.doris.nereids.trees.plans.commands.CreateTableCommand cmd =
+
(org.apache.doris.nereids.trees.plans.commands.CreateTableCommand) plan;
+
Assertions.assertThrows(org.apache.doris.nereids.exceptions.AnalysisException.class,
+ () -> cmd.getCreateTableInfo().validate(connectContext));
}
@Test
public void testCreateTableVarbinaryAsKey() {
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "CREATE TABLE t_vb3 (vb VARBINARY, v2 INT) DISTRIBUTED BY
HASH(vb) BUCKETS 1"));
+ Plan plan = new NereidsParser().parseSingle(
+ "CREATE TABLE t_vb3 (vb VARBINARY, v2 INT) DISTRIBUTED BY
HASH(vb) BUCKETS 1");
+ Assertions.assertTrue(plan instanceof
org.apache.doris.nereids.trees.plans.commands.CreateTableCommand);
+ org.apache.doris.nereids.trees.plans.commands.CreateTableCommand cmd =
+
(org.apache.doris.nereids.trees.plans.commands.CreateTableCommand) plan;
+
Assertions.assertThrows(org.apache.doris.nereids.exceptions.AnalysisException.class,
+ () -> cmd.getCreateTableInfo().validate(connectContext));
}
@Test
public void testCreateTableVarbinaryInComplex() {
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "CREATE TABLE t_vb4 (id INT, arr ARRAY<VARBINARY>) DISTRIBUTED
BY HASH(id) BUCKETS 1"));
+ Plan plan = new NereidsParser().parseSingle(
+ "CREATE TABLE t_vb4 (id INT, arr ARRAY<VARBINARY>) DISTRIBUTED
BY HASH(id) BUCKETS 1");
+ Assertions.assertTrue(plan instanceof
org.apache.doris.nereids.trees.plans.commands.CreateTableCommand);
+ org.apache.doris.nereids.trees.plans.commands.CreateTableCommand cmd =
+
(org.apache.doris.nereids.trees.plans.commands.CreateTableCommand) plan;
+
Assertions.assertThrows(org.apache.doris.nereids.exceptions.AnalysisException.class,
+ () -> cmd.getCreateTableInfo().validate(connectContext));
}
@Test
public void testCreateTableVarbinaryPartition() {
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "CREATE TABLE t_vb5 (k1 INT, vb VARBINARY) PARTITION BY
RANGE(k1)() DISTRIBUTED BY HASH(k1) BUCKETS 1"));
+ Plan plan = new NereidsParser().parseSingle(
+ "CREATE TABLE t_vb5 (k1 INT, vb VARBINARY) PARTITION BY
RANGE(k1)() DISTRIBUTED BY HASH(k1) BUCKETS 1");
+ Assertions.assertTrue(plan instanceof
org.apache.doris.nereids.trees.plans.commands.CreateTableCommand);
+ org.apache.doris.nereids.trees.plans.commands.CreateTableCommand cmd =
+
(org.apache.doris.nereids.trees.plans.commands.CreateTableCommand) plan;
+
Assertions.assertThrows(org.apache.doris.nereids.exceptions.AnalysisException.class,
+ () -> cmd.getCreateTableInfo().validate(connectContext));
}
@Test
public void testAlterAddVarbinary() {
- // Even adding via ALTER should fail
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "ALTER TABLE some_tbl ADD COLUMN vb VARBINARY"));
+ Plan plan = new NereidsParser().parseSingle(
+ "ALTER TABLE some_tbl ADD COLUMN vb VARBINARY");
+ Assertions.assertTrue(plan instanceof
org.apache.doris.nereids.trees.plans.commands.AlterTableCommand);
+ org.apache.doris.nereids.trees.plans.commands.AlterTableCommand cmd =
+
(org.apache.doris.nereids.trees.plans.commands.AlterTableCommand) plan;
+ Assertions.assertThrows(Throwable.class, () -> cmd.run(connectContext,
null));
}
@Test
public void testAlterModifyToVarbinary() {
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "ALTER TABLE some_tbl MODIFY COLUMN c1 VARBINARY"));
- }
-
- @Test
- public void testCreateTableLikeWithVarbinary() {
- // Statement referencing VARBINARY in a like/replace clause
- Assertions.assertThrows(Throwable.class, () -> new
NereidsParser().parseSingle(
- "CREATE TABLE t_vb6 LIKE base_tbl
PROPERTIES('replace_columns'='vb VARBINARY')"));
+ Plan plan = new NereidsParser().parseSingle(
+ "ALTER TABLE some_tbl MODIFY COLUMN c1 VARBINARY");
+ Assertions.assertTrue(plan instanceof
org.apache.doris.nereids.trees.plans.commands.AlterTableCommand);
+ org.apache.doris.nereids.trees.plans.commands.AlterTableCommand cmd =
+
(org.apache.doris.nereids.trees.plans.commands.AlterTableCommand) plan;
+ Assertions.assertThrows(Throwable.class, () -> cmd.run(connectContext,
null));
}
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java
index 64d7118d7d9..9a83840474a 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java
@@ -1086,4 +1086,41 @@ public class CreateTableCommandTest extends
TestWithFeService {
return command.getCreateMTMVInfo();
}
+
+ @Test
+ public void testMTMVRejectVarbinary() throws Exception {
+ String mv = "CREATE MATERIALIZED VIEW mv_vb\n"
+ + " BUILD DEFERRED REFRESH AUTO ON MANUAL\n"
+ + " DISTRIBUTED BY RANDOM BUCKETS 2\n"
+ + " PROPERTIES ('replication_num' = '1')\n"
+ + " AS SELECT X'AB' as vb;";
+
+ LogicalPlan plan = new NereidsParser().parseSingle(mv);
+ Assertions.assertTrue(plan instanceof CreateMTMVCommand);
+ CreateMTMVCommand cmd = (CreateMTMVCommand) plan;
+
+ org.apache.doris.nereids.exceptions.AnalysisException ex =
Assertions.assertThrows(
+ org.apache.doris.nereids.exceptions.AnalysisException.class,
+ () -> cmd.getCreateMTMVInfo().analyze(connectContext));
+ System.out.println(ex.getMessage());
+ Assertions.assertTrue(ex.getMessage().contains("MTMV do not support
varbinary type"));
+ Assertions.assertTrue(ex.getMessage().contains("vb"));
+ }
+
+ @Test
+ public void testVarBinaryModifyColumnRejected() throws Exception {
+ createTable("create table test.vb_alt (k1 int, v1 int)\n"
+ + "duplicate key(k1)\n"
+ + "distributed by hash(k1) buckets 1\n"
+ + "properties('replication_num' = '1');");
+
+ org.apache.doris.nereids.trees.plans.logical.LogicalPlan plan =
+ new org.apache.doris.nereids.parser.NereidsParser()
+ .parseSingle("alter table test.vb_alt modify column v1
VARBINARY");
+ Assertions.assertTrue(
+ plan instanceof
org.apache.doris.nereids.trees.plans.commands.AlterTableCommand);
+ org.apache.doris.nereids.trees.plans.commands.AlterTableCommand cmd2 =
+
(org.apache.doris.nereids.trees.plans.commands.AlterTableCommand) plan;
+ Assertions.assertThrows(Throwable.class, () ->
cmd2.run(connectContext, null));
+ }
}
diff --git
a/regression-test/data/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.out
b/regression-test/data/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.out
index 117f274ca81..755fb31d80a 100644
---
a/regression-test/data/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.out
+++
b/regression-test/data/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.out
@@ -127,7 +127,15 @@ varbinary_c varbinary(100) Yes true \N
0
-- !select_varbinary_type3 --
+0
+
+-- !select_varbinary_type4 --
+0
+
+-- !select_varbinary_type5 --
1 0x48656C6C6F20576F726C64
2 0x48656C6C6F20576F726C6421
3 0x48656C6C6F20576F726C6421
+4 \N
+5 0xAB
diff --git
a/regression-test/data/external_table_p0/jdbc/type_test/select/test_mysql_varbinary_with_udf.out
b/regression-test/data/external_table_p0/jdbc/type_test/select/test_mysql_varbinary_with_udf.out
new file mode 100644
index 00000000000..8a8da656c69
--- /dev/null
+++
b/regression-test/data/external_table_p0/jdbc/type_test/select/test_mysql_varbinary_with_udf.out
@@ -0,0 +1,26 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !desc_varbinary_type --
+id int Yes true \N
+varbinary_c varbinary(100) Yes true \N
+
+-- !select_varbinary_type --
+1 0x48656C6C6F20576F726C64
+2 0x48656C6C6F20576F726C6421
+3 \N
+4 0xAB
+5 0xABCDEF
+
+-- !select_varbinary_type4 --
+1 0x48656C6C6F20576F726C64 0x646C726F57206F6C6C6548
+2 0x48656C6C6F20576F726C6421 0x21646C726F57206F6C6C6548
+3 \N \N
+4 0xAB 0xAB
+5 0xABCDEF 0xEFCDAB
+
+-- !select_varbinary_type5 --
+1 0x48656C6C6F20576F726C64 0x646C726F57206F6C6C6548
+2 0x48656C6C6F20576F726C6421 0x21646C726F57206F6C6C6548
+3 \N \N
+4 0xAB 0xAB
+5 0xABCDEF 0xEFCDAB
+
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/VarBinaryTest.java
similarity index 56%
copy from
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
copy to
regression-test/java-udf-src/src/main/java/org/apache/doris/udf/VarBinaryTest.java
index 50820c2ecb9..34717a6c8b4 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/VarBinaryTest.java
@@ -15,24 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-package org.apache.doris.common.jni.utils;
+package org.apache.doris.udf;
-/**
- * Native method in doris::JavaNativeMethods.
- */
-public class JNINativeMethod {
- /**
- * Resize string column and return the new column address in off heap.
- */
- public static native long resizeStringColumn(long columnAddr, int
byteSize);
-
- /**
- * Allocate memory in off heap, which will be tracked by memory tracker.
- */
- public static native long memoryTrackerMalloc(long size);
-
- /**
- * Free memory in off heap, which will be tracked by memory tracker.
- */
- public static native void memoryTrackerFree(long address);
+public class VarBinaryTest {
+ public byte[] evaluate(byte[] input) {
+ if (input == null) {
+ return null;
+ }
+ byte[] out = new byte[input.length];
+ for (int i = 0, j = input.length - 1; i < input.length; i++, j--) {
+ out[i] = input[j];
+ }
+ return out;
+ }
}
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/VarBinaryTest2.java
similarity index 56%
copy from
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
copy to
regression-test/java-udf-src/src/main/java/org/apache/doris/udf/VarBinaryTest2.java
index 50820c2ecb9..55b429ae3a4 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/VarBinaryTest2.java
@@ -15,24 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-package org.apache.doris.common.jni.utils;
+package org.apache.doris.udf;
-/**
- * Native method in doris::JavaNativeMethods.
- */
-public class JNINativeMethod {
- /**
- * Resize string column and return the new column address in off heap.
- */
- public static native long resizeStringColumn(long columnAddr, int
byteSize);
-
- /**
- * Allocate memory in off heap, which will be tracked by memory tracker.
- */
- public static native long memoryTrackerMalloc(long size);
-
- /**
- * Free memory in off heap, which will be tracked by memory tracker.
- */
- public static native void memoryTrackerFree(long address);
+public class VarBinaryTest2 {
+ public Byte[] evaluate(Byte[] input) {
+ if (input == null) {
+ return null;
+ }
+ Byte[] out = new Byte[input.length];
+ for (int i = 0, j = input.length - 1; i < input.length; i++, j--) {
+ out[i] = input[j];
+ }
+ return out;
+ }
}
diff --git
a/regression-test/suites/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.groovy
b/regression-test/suites/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.groovy
index 14dd8347ddd..45ea26af514 100644
---
a/regression-test/suites/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.groovy
+++
b/regression-test/suites/external_table_p0/jdbc/type_test/select/test_mysql_all_types_select.groovy
@@ -52,7 +52,9 @@ suite("test_mysql_all_types_select",
"p0,external,mysql,external_docker,external
qt_desc_varbinary_type """desc test_varbinary;"""
qt_select_varbinary_type """select * from test_varbinary order by
id;"""
qt_select_varbinary_type2 """insert into test_varbinary values(3,
X'48656C6C6F20576F726C6421');"""
- qt_select_varbinary_type3 """select * from test_varbinary order by
id;"""
+ qt_select_varbinary_type3 """insert into test_varbinary values(4,
NULL);"""
+ qt_select_varbinary_type4 """insert into test_varbinary values(5,
X'AB');"""
+ qt_select_varbinary_type5 """select * from test_varbinary order by
id;"""
sql """drop catalog if exists mysql_all_type_test """
}
diff --git
a/regression-test/suites/external_table_p0/jdbc/type_test/select/test_mysql_varbinary_with_udf.groovy
b/regression-test/suites/external_table_p0/jdbc/type_test/select/test_mysql_varbinary_with_udf.groovy
new file mode 100644
index 00000000000..d63a0aff1a9
--- /dev/null
+++
b/regression-test/suites/external_table_p0/jdbc/type_test/select/test_mysql_varbinary_with_udf.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_mysql_varbinary_with_udf",
"p0,external,mysql,external_docker,external_docker_mysql") {
+ String enabled = context.config.otherConfigs.get("enableJdbcTest")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String s3_endpoint = getS3Endpoint()
+ String bucket = getS3BucketName()
+ String driver_url =
"https://${bucket}.${s3_endpoint}/regression/jdbc_driver/mysql-connector-j-8.3.0.jar"
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String mysql_port = context.config.otherConfigs.get("mysql_57_port");
+
+ sql """drop catalog if exists mysql_varbinary_udf_catalog """
+ sql """create catalog if not exists mysql_varbinary_udf_catalog
properties(
+ "type"="jdbc",
+ "user"="root",
+ "password"="123456",
+ "jdbc_url" =
"jdbc:mysql://${externalEnvIp}:${mysql_port}/doris_test?useSSL=false",
+ "driver_url" = "${driver_url}",
+ "driver_class" = "com.mysql.cj.jdbc.Driver"
+ );"""
+
+ sql """use mysql_varbinary_udf_catalog.test_varbinary_db"""
+ qt_desc_varbinary_type """desc test_varbinary_udf;"""
+ qt_select_varbinary_type """select * from test_varbinary_udf order by
id;"""
+
+ sql """switch internal"""
+ sql """create database if not exists test_mysql_udf;"""
+ sql """use internal.test_mysql_udf;"""
+
+ def jarPath =
"""${context.file.parent}/../../../../javaudf_p0/jars/java-udf-case-jar-with-dependencies.jar"""
+ scp_udf_file_to_all_be(jarPath)
+ log.info("Jar path: ${jarPath}".toString())
+
+ try_sql("DROP FUNCTION IF EXISTS udf_test_varbinary(varbinary);")
+ sql """ CREATE FUNCTION udf_test_varbinary(varbinary) RETURNS
varbinary PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.VarBinaryTest",
+ "always_nullable"="true",
+ "type"="JAVA_UDF"
+ ); """
+
+ try_sql("DROP FUNCTION IF EXISTS udf_test_varbinary2(varbinary);")
+ sql """ CREATE FUNCTION udf_test_varbinary2(varbinary) RETURNS
varbinary PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.VarBinaryTest2",
+ "always_nullable"="true",
+ "type"="JAVA_UDF"
+ ); """
+
+ qt_select_varbinary_type4 """select id, varbinary_c,
udf_test_varbinary(varbinary_c) as col_varbinary_reversed from
mysql_varbinary_udf_catalog.test_varbinary_db.test_varbinary_udf order by id;"""
+ qt_select_varbinary_type5 """select id, varbinary_c,
udf_test_varbinary2(varbinary_c) as col_varbinary_reversed from
mysql_varbinary_udf_catalog.test_varbinary_db.test_varbinary_udf order by id;"""
+
+ sql """drop catalog if exists mysql_varbinary_udf_catalog """
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]