This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 267abae917f branch40: [improve](varbinary) support varbinary type with 
topn runtime filter (#58721) (#59479)
267abae917f is described below

commit 267abae917fd731585a11c8a9d098d9c21e3e8b4
Author: zhangstar333 <[email protected]>
AuthorDate: Wed Dec 31 11:48:02 2025 +0800

    branch40: [improve](varbinary) support varbinary type with topn runtime 
filter (#58721) (#59479)
    
    Problem Summary:
    pick from master (#58721)
    support varbinary with topn runtime filter eg: order by binary_col limit
    n
    and temp forbid varbinary type at: group by key, join key, comparison
    predicate in FE part.
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/runtime/primitive_type.h                    |   3 +-
 be/src/runtime/runtime_predicate.cpp               |   6 ++
 be/src/vec/columns/column_varbinary.cpp            |  10 +-
 be/src/vec/columns/column_varbinary.h              |   5 +-
 be/src/vec/common/string_view.h                    |   7 +-
 be/src/vec/core/field.cpp                          |   4 +
 be/src/vec/core/field.h                            |  96 +++++++++++++++--
 be/src/vec/core/sort_block.h                       |   6 ++
 be/src/vec/data_types/convert_field_to_type.cpp    |   5 +
 be/src/vec/data_types/data_type_varbinary.h        |   2 +-
 .../data_types/serde/data_type_varbinary_serde.cpp |  13 ++-
 be/src/vec/exprs/vexpr.cpp                         |   6 ++
 be/src/vec/exprs/vexpr.h                           |   7 ++
 be/test/vec/columns/column_varbinary_test.cpp      |   4 +-
 be/test/vec/common/string_view_test.cpp            | 104 +++++++++++++++++-
 .../vec/data_types/data_type_varbinary_test.cpp    | 113 +++++++++++++++++--
 .../exec/format/parquet/parquet_reader_test.cpp    |  24 ++---
 .../doris/iceberg/IcebergSysTableColumnValue.java  |   7 ++
 .../org/apache/doris/analysis/OutFileClause.java   |   2 +
 .../nereids/rules/analysis/CheckAfterRewrite.java  |   8 +-
 .../apache/doris/nereids/types/VarBinaryType.java  |   4 +
 .../doris/nereids/util/TypeCoercionUtils.java      |   6 ++
 .../export/test_hive_export_varbinary.out          |  29 +++++
 .../data/external_table_p0/hive/test_hive_orc.out  |  96 +++++++++++++++++
 .../iceberg/test_iceberg_sys_table.out             |  35 ++++++
 .../export/test_hive_export_varbinary.groovy       | 120 +++++++++++++++++++++
 .../external_table_p0/hive/test_hive_orc.groovy    |  55 ++++++++++
 .../iceberg/test_iceberg_sys_table.groovy          |  19 ++++
 28 files changed, 752 insertions(+), 44 deletions(-)

diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 4673160bc52..4686bdbab7d 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -603,7 +603,8 @@ struct PrimitiveTypeTraits<TYPE_VARBINARY> {
     using ColumnItemType = doris::StringView;
     using DataType = vectorized::DataTypeVarbinary;
     using ColumnType = vectorized::ColumnVarbinary;
-    using NearestFieldType = doris::StringView;
+    // StringView is non-owning, but StringViewField wraps it with String for 
ownership
+    using NearestFieldType = vectorized::StringViewField;
     static constexpr PrimitiveType NearestPrimitiveType = TYPE_VARBINARY;
     static constexpr PrimitiveType AvgNearestPrimitiveType = TYPE_VARBINARY;
 };
diff --git a/be/src/runtime/runtime_predicate.cpp 
b/be/src/runtime/runtime_predicate.cpp
index 8889c5955b1..1269d6b5666 100644
--- a/be/src/runtime/runtime_predicate.cpp
+++ b/be/src/runtime/runtime_predicate.cpp
@@ -197,6 +197,12 @@ bool RuntimePredicate::_init(PrimitiveType type) {
         _get_value_fn = get_normal_value<TYPE_IPV6>;
         break;
     }
+    case PrimitiveType::TYPE_VARBINARY: {
+        _get_value_fn = [](const Field& field) {
+            return field.get<StringViewField>().get_string();
+        };
+        break;
+    }
     default:
         return false;
     }
diff --git a/be/src/vec/columns/column_varbinary.cpp 
b/be/src/vec/columns/column_varbinary.cpp
index 0203e151b6b..037f96fd5c6 100644
--- a/be/src/vec/columns/column_varbinary.cpp
+++ b/be/src/vec/columns/column_varbinary.cpp
@@ -28,6 +28,7 @@
 #include "vec/columns/columns_common.h"
 #include "vec/common/arena.h"
 #include "vec/common/assert_cast.h"
+#include "vec/core/sort_block.h"
 
 namespace doris::vectorized {
 #include "common/compile_check_begin.h"
@@ -144,7 +145,7 @@ MutableColumnPtr ColumnVarbinary::permute(const 
IColumn::Permutation& perm, size
             res_data[i] = val;
             continue;
         }
-        const auto* dst = const_cast<Arena&>(_arena).insert(val.data(), 
val.size());
+        const auto* dst = res->_arena.insert(val.data(), val.size());
         res_data[i] = doris::StringView(dst, val.size());
     }
 
@@ -222,5 +223,12 @@ void ColumnVarbinary::insert_many_strings_overflow(const 
StringRef* strings, siz
     insert_many_strings(strings, num);
 }
 
+void ColumnVarbinary::sort_column(const ColumnSorter* sorter, EqualFlags& 
flags,
+                                  IColumn::Permutation& perms, EqualRange& 
range,
+                                  bool last_column) const {
+    sorter->sort_column(assert_cast<const ColumnVarbinary&>(*this), flags, 
perms, range,
+                        last_column);
+}
+
 #include "common/compile_check_end.h"
 } // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_varbinary.h 
b/be/src/vec/columns/column_varbinary.h
index e9e900954c6..50fe660cfd8 100644
--- a/be/src/vec/columns/column_varbinary.h
+++ b/be/src/vec/columns/column_varbinary.h
@@ -77,7 +77,7 @@ public:
     char* alloc(size_t length) { return _arena.alloc(length); }
 
     void insert(const Field& x) override {
-        auto value = vectorized::get<const doris::StringView&>(x);
+        const auto& value = vectorized::get<const StringViewField&>(x);
         insert_data(value.data(), value.size());
     }
 
@@ -185,6 +185,9 @@ public:
     void insert_many_strings_overflow(const StringRef* strings, size_t num,
                                       size_t max_length) override;
 
+    void sort_column(const ColumnSorter* sorter, EqualFlags& flags, 
IColumn::Permutation& perms,
+                     EqualRange& range, bool last_column) const override;
+
 private:
     Container _data;
     Arena _arena;
diff --git a/be/src/vec/common/string_view.h b/be/src/vec/common/string_view.h
index 5cd560aad4a..218104ff750 100644
--- a/be/src/vec/common/string_view.h
+++ b/be/src/vec/common/string_view.h
@@ -126,16 +126,15 @@ public:
     std::string dump_hex() const {
         static const char* kHex = "0123456789ABCDEF";
         std::string out;
-        out.reserve(size_ * 2 + 3);
-        out.push_back('X');
-        out.push_back('\'');
+        out.reserve(size_ * 2 + 2);
+        out.push_back('0');
+        out.push_back('x');
         const char* ptr = data();
         for (uint32_t i = 0; i < size_; ++i) {
             auto c = static_cast<unsigned char>(ptr[i]);
             out.push_back(kHex[c >> 4]);
             out.push_back(kHex[c & 0x0F]);
         }
-        out.push_back('\'');
         return out;
     }
 
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
index ffefb8ffd20..c0b03df7c3c 100644
--- a/be/src/vec/core/field.cpp
+++ b/be/src/vec/core/field.cpp
@@ -739,6 +739,10 @@ std::string_view Field::as_string_view() const {
         const auto& s = get<String>();
         return {s.data(), s.size()};
     }
+    if (type == PrimitiveType::TYPE_VARBINARY) {
+        const auto& svf = get<StringViewField>();
+        return {svf.data(), svf.size()};
+    }
     // MATCH_PRIMITIVE_TYPE(INVALID_TYPE);
     // MATCH_PRIMITIVE_TYPE(TYPE_NULL);
     MATCH_PRIMITIVE_TYPE(TYPE_BOOLEAN);
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 2c611a59a28..3ef504dc0d6 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -246,6 +246,83 @@ private:
     UInt32 scale;
 };
 
+// StringViewField wraps a StringView and provides deep copy semantics.
+// Since StringView is a non-owning view (only contains pointer and length),
+// we need to store the actual data in a String to ensure the Field owns the 
data.
+// This prevents dangling pointer issues when Field objects are copied or 
moved.
+class StringViewField {
+public:
+    StringViewField() = default;
+    ~StringViewField() = default;
+
+    // Construct from raw data - performs deep copy
+    StringViewField(const char* data, size_t len) : _storage(data, len) {}
+
+    // Construct from StringView - performs deep copy
+    StringViewField(const StringView& sv) : _storage(sv.data(), sv.size()) {}
+
+    // Copy constructor - deep copy
+    StringViewField(const StringViewField& x) = default;
+
+    // Move constructor
+    StringViewField(StringViewField&& x) noexcept = default;
+
+    // Copy assignment - deep copy
+    StringViewField& operator=(const StringViewField& x) = default;
+
+    // Move assignment
+    StringViewField& operator=(StringViewField&& x) noexcept = default;
+
+    // Access methods
+    const char* data() const { return _storage.data(); }
+    size_t size() const { return _storage.size(); }
+    const String& get_string() const { return _storage; }
+
+    // Convert to StringView for compatibility
+    StringView to_string_view() const { return {data(), 
static_cast<uint32_t>(size())}; }
+
+    // Comparison operators - using binary comparison (memcmp) for VARBINARY 
semantics
+    bool operator<(const StringViewField& r) const {
+        int cmp = memcmp(_storage.data(), r._storage.data(),
+                         std::min(_storage.size(), r._storage.size()));
+        return cmp < 0 || (cmp == 0 && _storage.size() < r._storage.size());
+    }
+    bool operator<=(const StringViewField& r) const { return !(r < *this); }
+    bool operator==(const StringViewField& r) const {
+        return _storage.size() == r._storage.size() &&
+               memcmp(_storage.data(), r._storage.data(), _storage.size()) == 
0;
+    }
+    bool operator>(const StringViewField& r) const { return r < *this; }
+    bool operator>=(const StringViewField& r) const { return !(*this < r); }
+    bool operator!=(const StringViewField& r) const { return !(*this == r); }
+
+    std::strong_ordering operator<=>(const StringViewField& r) const {
+        size_t min_size = std::min(_storage.size(), r._storage.size());
+        int cmp = memcmp(_storage.data(), r._storage.data(), min_size);
+        if (cmp < 0) {
+            return std::strong_ordering::less;
+        }
+        if (cmp > 0) {
+            return std::strong_ordering::greater;
+        }
+        // Prefixes are equal, compare lengths
+        return _storage.size() <=> r._storage.size();
+    }
+
+    // Arithmetic operators (not commonly used but required by Field)
+    const StringViewField& operator+=(const StringViewField& r) {
+        _storage += r._storage;
+        return *this;
+    }
+
+    const StringViewField& operator-=(const StringViewField& r) {
+        throw Exception(Status::FatalError("Not support minus operation on 
StringViewField"));
+    }
+
+private:
+    String _storage; // Use String for deep copy and ownership
+};
+
 /** 32 is enough. Round number is used for alignment and for better arithmetic 
inside std::vector.
   * NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 
40 bytes.
   */
@@ -390,7 +467,7 @@ public:
         case PrimitiveType::TYPE_VARCHAR:
             return get<String>() <=> rhs.get<String>();
         case PrimitiveType::TYPE_VARBINARY:
-            return get<doris::StringView>() <=> rhs.get<doris::StringView>();
+            return get<StringViewField>() <=> rhs.get<StringViewField>();
         case PrimitiveType::TYPE_DECIMAL32:
             return get<Decimal32>() <=> rhs.get<Decimal32>();
         case PrimitiveType::TYPE_DECIMAL64:
@@ -439,7 +516,7 @@ public:
             f(field.template get<String>());
             return;
         case PrimitiveType::TYPE_VARBINARY:
-            f(field.template get<doris::StringView>());
+            f(field.template get<StringViewField>());
             return;
         case PrimitiveType::TYPE_JSONB:
             f(field.template get<JsonbField>());
@@ -489,11 +566,11 @@ public:
     std::string_view as_string_view() const;
 
 private:
-    std::aligned_union_t<
-            DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, UInt64, 
UInt128, Int64, Int128, IPv6,
-            Float64, String, JsonbField, Array, Tuple, Map, VariantMap, 
DecimalField<Decimal32>,
-            DecimalField<Decimal64>, DecimalField<Decimal128V2>, 
DecimalField<Decimal128V3>,
-            DecimalField<Decimal256>, BitmapValue, HyperLogLog, QuantileState, 
doris::StringView>
+    std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, 
UInt64, UInt128, Int64,
+                         Int128, IPv6, Float64, String, JsonbField, 
StringViewField, Array, Tuple,
+                         Map, VariantMap, DecimalField<Decimal32>, 
DecimalField<Decimal64>,
+                         DecimalField<Decimal128V2>, 
DecimalField<Decimal128V3>,
+                         DecimalField<Decimal256>, BitmapValue, HyperLogLog, 
QuantileState>
             storage;
 
     PrimitiveType type;
@@ -647,6 +724,11 @@ struct NearestFieldTypeImpl<PackedInt128> {
     using Type = Int128;
 };
 
+template <>
+struct NearestFieldTypeImpl<doris::StringView> {
+    using Type = StringViewField;
+};
+
 template <typename T>
 decltype(auto) cast_to_nearest_field_type(T&& x) {
     using U = NearestFieldType<std::decay_t<T>>;
diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h
index bc25129b4a2..b65f5b715df 100644
--- a/be/src/vec/core/sort_block.h
+++ b/be/src/vec/core/sort_block.h
@@ -38,6 +38,7 @@
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_struct.h"
+#include "vec/columns/column_varbinary.h"
 #include "vec/common/memcmp_small.h"
 #include "vec/common/string_ref.h"
 #include "vec/core/block.h"
@@ -249,6 +250,10 @@ public:
                      EqualRange& range, bool last_column) const {
         _sort_by_default(column, flags, perms, range, last_column);
     }
+    void sort_column(const ColumnVarbinary& column, EqualFlags& flags, 
IColumn::Permutation& perms,
+                     EqualRange& range, bool last_column) const {
+        _sort_by_default(column, flags, perms, range, last_column);
+    }
 
     void sort_column(const ColumnString64& column, EqualFlags& flags, 
IColumn::Permutation& perms,
                      EqualRange& range, bool last_column) const {
@@ -378,6 +383,7 @@ private:
             if constexpr (!std::is_same_v<ColumnType, ColumnString> &&
                           !std::is_same_v<ColumnType, ColumnString64> &&
                           !std::is_same_v<ColumnType, ColumnArray> &&
+                          !std::is_same_v<ColumnType, ColumnVarbinary> &&
                           !std::is_same_v<ColumnType, ColumnMap> &&
                           !std::is_same_v<ColumnType, ColumnStruct>) {
                 auto value_a = column.get_data()[a];
diff --git a/be/src/vec/data_types/convert_field_to_type.cpp 
b/be/src/vec/data_types/convert_field_to_type.cpp
index bdd3a7922ba..28947232d5b 100644
--- a/be/src/vec/data_types/convert_field_to_type.cpp
+++ b/be/src/vec/data_types/convert_field_to_type.cpp
@@ -93,6 +93,11 @@ public:
         writer->writeString(x);
         writer->writeEndString();
     }
+    void operator()(const StringViewField& x, JsonbWriter* writer) const {
+        writer->writeStartString();
+        writer->writeString(x.data(), x.size());
+        writer->writeEndString();
+    }
     void operator()(const JsonbField& x, JsonbWriter* writer) const {
         const JsonbDocument* doc;
         THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(x.get_value(), 
x.get_size(), &doc));
diff --git a/be/src/vec/data_types/data_type_varbinary.h 
b/be/src/vec/data_types/data_type_varbinary.h
index fa13d19287d..f84884d8e1b 100644
--- a/be/src/vec/data_types/data_type_varbinary.h
+++ b/be/src/vec/data_types/data_type_varbinary.h
@@ -40,7 +40,7 @@ class IColumn;
 class DataTypeVarbinary : public IDataType {
 public:
     using ColumnType = ColumnVarbinary;
-    using FieldType = doris::StringView;
+    using FieldType = StringViewField;
 
     static constexpr PrimitiveType PType = TYPE_VARBINARY;
 
diff --git a/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp 
b/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp
index 12e8a7c1924..b60ab825332 100644
--- a/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp
@@ -41,7 +41,7 @@ Status 
DataTypeVarbinarySerDe::write_column_to_mysql_binary(const IColumn& colum
                                                             int64_t row_idx, 
bool col_const,
                                                             const 
FormatOptions& options) const {
     auto col_index = index_check_const(row_idx, col_const);
-    auto data = assert_cast<const 
ColumnVarbinary&>(column).get_data()[col_index];
+    const auto& data = assert_cast<const 
ColumnVarbinary&>(column).get_data()[col_index];
 
     if (0 != result.push_string(data.data(), data.size())) {
         return Status::InternalError("pack mysql buffer failed.");
@@ -62,7 +62,7 @@ Status DataTypeVarbinarySerDe::write_column_to_arrow(const 
IColumn& column, cons
                                                  builder.type()->name()));
                 continue;
             }
-            auto string_view = varbinary_column_data[i];
+            const auto& string_view = varbinary_column_data[i];
             
RETURN_IF_ERROR(checkArrowStatus(builder.Append(string_view.data(), 
string_view.size()),
                                              column.get_name(), 
builder.type()->name()));
         }
@@ -118,8 +118,13 @@ Status 
DataTypeVarbinarySerDe::deserialize_one_cell_from_json(IColumn& column, S
 
 void DataTypeVarbinarySerDe::to_string(const IColumn& column, size_t row_num, 
BufferWritable& bw,
                                        const FormatOptions& options) const {
-    const auto value = assert_cast<const 
ColumnVarbinary&>(column).get_data_at(row_num);
-    bw.write(value.data, value.size);
+    const auto& value = assert_cast<const 
ColumnVarbinary&>(column).get_data()[row_num];
+    if (_nesting_level >= 2) { // in complex type, need to dump as hex string 
by hand
+        const auto& hex_str = value.dump_hex();
+        bw.write(hex_str.data(), hex_str.size());
+    } else { // mysql protocol will be handle as hex binary data directly
+        bw.write(value.data(), value.size());
+    }
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index 58707907f68..e7b945edee7 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -349,6 +349,12 @@ TExprNode create_texpr_node_from(const vectorized::Field& 
field, const Primitive
         THROW_IF_ERROR(create_texpr_literal_node<TYPE_TIMEV2>(&storage, 
&node));
         break;
     }
+    case TYPE_VARBINARY: {
+        const auto& svf = field.get<vectorized::StringViewField>();
+        const std::string& storage = svf.get_string();
+        THROW_IF_ERROR(create_texpr_literal_node<TYPE_VARBINARY>(&storage, 
&node));
+        break;
+    }
     default:
         throw Exception(ErrorCode::INTERNAL_ERROR, "runtime filter meet 
invalid type {}",
                         int(type));
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index f86035ddaec..03694652bf0 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -595,6 +595,13 @@ Status create_texpr_literal_node(const void* data, 
TExprNode* node, int precisio
         (*node).__set_timev2_literal(timev2_literal);
         (*node).__set_node_type(TExprNodeType::TIMEV2_LITERAL);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_TIMEV2, 
precision, scale));
+    } else if constexpr (T == TYPE_VARBINARY) {
+        const auto* origin_value = reinterpret_cast<const std::string*>(data);
+        (*node).__set_node_type(TExprNodeType::VARBINARY_LITERAL);
+        TVarBinaryLiteral varbinary_literal;
+        varbinary_literal.__set_value(*origin_value);
+        (*node).__set_varbinary_literal(varbinary_literal);
+        (*node).__set_type(create_type_desc(PrimitiveType::TYPE_VARBINARY));
     } else {
         return Status::InvalidArgument("Invalid argument type!");
     }
diff --git a/be/test/vec/columns/column_varbinary_test.cpp 
b/be/test/vec/columns/column_varbinary_test.cpp
index 7799422b23d..4eeb637e64e 100644
--- a/be/test/vec/columns/column_varbinary_test.cpp
+++ b/be/test/vec/columns/column_varbinary_test.cpp
@@ -347,13 +347,13 @@ TEST_F(ColumnVarbinaryTest, FieldAccessOperatorAndGet) {
     for (size_t i = 0; i < vals.size(); ++i) {
         // operator[]
         Field f = (*col)[i];
-        auto sv = vectorized::get<const doris::StringView&>(f);
+        const auto& sv = vectorized::get<const StringViewField&>(f);
         ASSERT_EQ(sv.size(), vals[i].size());
         ASSERT_EQ(memcmp(sv.data(), vals[i].data(), sv.size()), 0);
         // get(size_t, Field&)
         Field f2;
         col->get(i, f2);
-        auto sv2 = vectorized::get<const doris::StringView&>(f2);
+        const auto& sv2 = vectorized::get<const StringViewField&>(f2);
         ASSERT_EQ(sv2.size(), vals[i].size());
         ASSERT_EQ(memcmp(sv2.data(), vals[i].data(), sv2.size()), 0);
     }
diff --git a/be/test/vec/common/string_view_test.cpp 
b/be/test/vec/common/string_view_test.cpp
index 4bfd8c25ea6..63bf3edb845 100644
--- a/be/test/vec/common/string_view_test.cpp
+++ b/be/test/vec/common/string_view_test.cpp
@@ -223,13 +223,13 @@ TEST_F(StringViewTest, ThreeWayComparisonOrdering) {
 TEST_F(StringViewTest, DumpHex) {
     // Empty
     StringView empty;
-    EXPECT_EQ(empty.dump_hex(), "X''");
+    EXPECT_EQ(empty.dump_hex(), "0x");
 
     // Inline with known bytes
     const unsigned char bytes_inline[] = {0x00, 0x01, 0x0A, 0x1F, 0x7F};
     StringView svi(reinterpret_cast<const char*>(bytes_inline), 
sizeof(bytes_inline));
     EXPECT_TRUE(svi.isInline());
-    EXPECT_EQ(svi.dump_hex(), "X'00010A1F7F'");
+    EXPECT_EQ(svi.dump_hex(), "0x00010A1F7F");
 
     // Non-inline, length > 12
     std::string big = make_bytes(16, 0x20); // bytes 0x20,0x21,...
@@ -237,13 +237,109 @@ TEST_F(StringViewTest, DumpHex) {
     EXPECT_FALSE(svb.isInline());
     // Build expected
     std::ostringstream oss;
-    oss << "X'";
+    oss << "0x";
     for (unsigned char c : big) {
         static const char* kHex = "0123456789ABCDEF";
         oss << kHex[c >> 4] << kHex[c & 0x0F];
     }
-    oss << "'";
     EXPECT_EQ(svb.dump_hex(), oss.str());
 }
 
+// Verify inline strings with length > 4 correctly store and compare tail bytes
+TEST_F(StringViewTest, InlineTailBytesAndEquality) {
+    std::string s1 = "abcdEFGHIJ"; // len=10, inline
+    std::string s2 = "abcdEFGHIQ"; // same prefix, differ at last byte
+    StringView v1(s1);
+    StringView v2(s2);
+    ASSERT_TRUE(v1.isInline());
+    ASSERT_TRUE(v2.isInline());
+
+    // Full content preserved
+    EXPECT_EQ(static_cast<std::string>(v1), s1);
+    // operator== must detect tail difference
+    EXPECT_FALSE(v1 == v2);
+    EXPECT_NE(v1.compare(v2), 0);
+}
+
+// Cover constructors from std::string_view and unsigned char*, and high-bytes 
dump
+TEST_F(StringViewTest, StringViewAndUnsignedCtorAndHighHex) {
+    // std::string_view ctor (inline boundary)
+    std::string inl = std::string(12, '\xAB');
+    std::string_view svw(inl);
+    StringView v_inl(svw);
+    EXPECT_TRUE(v_inl.isInline());
+    EXPECT_EQ(::memcmp(v_inl.data(), inl.data(), inl.size()), 0);
+
+    // unsigned char* ctor with >0x7F bytes to check sign issues in dump_hex
+    std::vector<uint8_t> bytes = {0x80, 0xFF, 0x00, 0x7F};
+    StringView v_unsigned(reinterpret_cast<unsigned char*>(bytes.data()),
+                          static_cast<uint32_t>(bytes.size()));
+    EXPECT_TRUE(v_unsigned.isInline());
+    EXPECT_EQ(v_unsigned.dump_hex(), "0x80FF007F");
+}
+
+// Construct from nullptr with zero length should be a valid empty inline view
+TEST_F(StringViewTest, NullPtrZeroLenCtor) {
+    StringView v(static_cast<const char*>(nullptr), 0);
+    EXPECT_TRUE(v.empty());
+    EXPECT_TRUE(v.isInline());
+    EXPECT_EQ(v.size(), 0U);
+    // stream and string conversions should yield empty
+    std::ostringstream oss;
+    oss << v;
+    EXPECT_TRUE(oss.str().empty());
+    EXPECT_TRUE(static_cast<std::string>(v).empty());
+}
+
+// Compare where both sides share prefix but decision comes from length after 
prefix (inline)
+TEST_F(StringViewTest, CompareAfterPrefixInlineLength) {
+    std::string a = "abcdEF";   // len=6
+    std::string b = "abcdEFGH"; // len=8, starts with a
+    StringView va(a), vb(b);
+    ASSERT_TRUE(va.isInline());
+    ASSERT_TRUE(vb.isInline());
+    EXPECT_LT(va.compare(vb), 0);
+    EXPECT_TRUE((va <=> vb) == std::strong_ordering::less);
+}
+
+// Same as above but with non-inline strings
+TEST_F(StringViewTest, CompareAfterPrefixNonInlineLength) {
+    std::string base = make_bytes(24, 0x41); // >=13 => non-inline
+    std::string short_s = base.substr(0, 20);
+    std::string long_s = short_s + "ZZ"; // same prefix, longer
+    StringView vs(short_s), vl(long_s);
+    ASSERT_FALSE(vs.isInline());
+    ASSERT_FALSE(vl.isInline());
+    EXPECT_LT(vs.compare(vl), 0);
+    EXPECT_TRUE((vs <=> vl) == std::strong_ordering::less);
+}
+
+// Non-inline copy semantics: copying should keep pointer identity and equality
+TEST_F(StringViewTest, NonInlineCopySemanticsAndIteration) {
+    std::string big = make_bytes(32, 0x21);
+    StringView a(big);
+    StringView b = a; // copy
+    ASSERT_FALSE(a.isInline());
+    ASSERT_FALSE(b.isInline());
+    EXPECT_EQ(a.data(), b.data());
+    EXPECT_TRUE(a == b);
+
+    // Iteration reconstructs the same bytes
+    std::string via_iter(a.begin(), a.end());
+    EXPECT_EQ(via_iter.size(), big.size());
+    EXPECT_EQ(::memcmp(via_iter.data(), big.data(), big.size()), 0);
+}
+
+// operator== should also detect non-inline tail differences (not only 
compare())
+TEST_F(StringViewTest, NonInlineEqualityDetectsTailDiff) {
+    std::string s1 = make_bytes(20, 0x30);
+    std::string s2 = s1;
+    s2[10] ^= 0x1; // differ after prefix
+    StringView v1(s1), v2(s2);
+    ASSERT_FALSE(v1.isInline());
+    ASSERT_FALSE(v2.isInline());
+    EXPECT_FALSE(v1 == v2);
+    EXPECT_NE(v1.compare(v2), 0);
+}
+
 } // namespace doris
diff --git a/be/test/vec/data_types/data_type_varbinary_test.cpp 
b/be/test/vec/data_types/data_type_varbinary_test.cpp
index 33571fe4074..6465758c623 100644
--- a/be/test/vec/data_types/data_type_varbinary_test.cpp
+++ b/be/test/vec/data_types/data_type_varbinary_test.cpp
@@ -87,7 +87,7 @@ TEST_F(DataTypeVarbinaryTest, CreateColumnAndCheckColumn) {
 TEST_F(DataTypeVarbinaryTest, GetDefaultField) {
     DataTypeVarbinary dt;
     Field def = dt.get_default();
-    const auto& sv = get<const doris::StringView&>(def);
+    const auto& sv = get<const StringViewField&>(def);
     EXPECT_EQ(sv.size(), 0U);
 }
 
@@ -176,7 +176,7 @@ TEST_F(DataTypeVarbinaryTest, GetFieldWithDataType) {
 
     auto fwd = dt.get_field_with_data_type(*col, 0);
     EXPECT_EQ(fwd.base_scalar_type_id, PrimitiveType::TYPE_VARBINARY);
-    const auto& sv = get<const doris::StringView&>(fwd.field);
+    const auto& sv = get<const StringViewField&>(fwd.field);
     ASSERT_EQ(sv.size(), v.size());
     ASSERT_EQ(memcmp(sv.data(), v.data(), sv.size()), 0);
 }
@@ -189,18 +189,119 @@ TEST_F(DataTypeVarbinaryTest, GetFieldFromTExprNode) {
     node.__isset.varbinary_literal = true;
 
     Field f = dt.get_field(node);
-    const auto& sv = get<const doris::StringView&>(f);
+    const auto& sv = get<const StringViewField&>(f);
     ASSERT_EQ(sv.size(), 5U);
     ASSERT_EQ(memcmp(sv.data(), "hello", 5), 0);
 }
 
-TEST_F(DataTypeVarbinaryTest, ToProtobufLen) {
-    DataTypeVarbinary dt(123);
+TEST_F(DataTypeVarbinaryTest, CheckColumnOnConstColumn) {
+    DataTypeVarbinary dt;
+    auto col = dt.create_column();
+    auto* vb = assert_cast<ColumnVarbinary*>(col.get());
+    std::string v = make_bytes(4, 0x12);
+    vb->insert_data(v.data(), v.size());
+
+    // Wrap as const column
+    auto cconst = ColumnConst::create(col->get_ptr(), /*size=*/5);
+    EXPECT_TRUE(dt.check_column(*cconst).ok());
+}
+
+TEST_F(DataTypeVarbinaryTest, SerializeDeserializeConstColumn) {
+    DataTypeVarbinary dt;
+    auto base = dt.create_column();
+    auto* vb = assert_cast<ColumnVarbinary*>(base.get());
+    std::string val = make_bytes(3, 0x7A);
+    vb->insert_data(val.data(), val.size());
+
+    // Make it const with logical row_num=5
+    ColumnPtr const_col = ColumnConst::create(base->get_ptr(), /*size=*/5);
+
+    int ver = BeExecVersionManager::get_newest_version();
+    // Expect: bool + size_t(row_num) + size_t(real_need_copy_num=1) + one 
size + payload
+    size_t expected = sizeof(bool) + sizeof(size_t) + sizeof(size_t) + 
sizeof(size_t) + val.size();
+    auto sz = dt.get_uncompressed_serialized_bytes(*const_col, ver);
+    EXPECT_EQ(static_cast<size_t>(sz), expected);
+
+    std::string buf;
+    buf.resize(expected);
+    char* p = buf.data();
+    char* end = dt.serialize(*const_col, p, ver);
+    ASSERT_EQ(static_cast<size_t>(end - p), expected);
+
+    MutableColumnPtr deser = dt.create_column();
+    const char* p2 = buf.data();
+    const char* end2 = dt.deserialize(p2, &deser, ver);
+    ASSERT_EQ(static_cast<size_t>(end2 - p2), expected);
+
+    // After deserialize, the output is a ColumnConst wrapping the data column.
+    ColumnPtr out = deser->get_ptr();
+    ASSERT_TRUE(is_column_const(*out));
+    const auto& cconst = assert_cast<const ColumnConst&>(*out);
+    EXPECT_EQ(cconst.size(), 5U); // logical row num retained
+    const auto& inner = assert_cast<const 
ColumnVarbinary&>(*cconst.get_data_column_ptr());
+    ASSERT_EQ(inner.size(), 1U);
+    auto r = inner.get_data_at(0);
+    ASSERT_EQ(r.size, val.size());
+    ASSERT_EQ(memcmp(r.data, val.data(), r.size), 0);
+}
+
+TEST_F(DataTypeVarbinaryTest, SerDeWriteColumnToMysql) {
+    DataTypeVarbinary dt;
+    auto col = dt.create_column();
+    auto* vb = assert_cast<ColumnVarbinary*>(col.get());
+    std::string v1 = make_bytes(2, 0x10);
+    vb->insert_data(v1.data(), v1.size());
+
+    auto serde = dt.get_serde();
+    // binary protocol
+    doris::MysqlRowBinaryBuffer rb_bin;
+    auto format_options = DataTypeSerDe::FormatOptions();
+    auto st2 = serde->write_column_to_mysql_binary(*col, rb_bin, 
/*row_idx=*/0, /*col_const=*/false,
+                                                   format_options);
+    EXPECT_TRUE(st2.ok());
+    EXPECT_GT(rb_bin.length(), 0);
+}
+
+TEST_F(DataTypeVarbinaryTest, GetStorageFieldTypeThrows) {
+    DataTypeVarbinary dt;
+    EXPECT_THROW({ (void)dt.get_storage_field_type(); }, doris::Exception);
+}
+
+TEST_F(DataTypeVarbinaryTest, GetFieldFromTExprNodeWithEmbeddedNull) {
+    DataTypeVarbinary dt;
+    TExprNode node;
+    node.node_type = TExprNodeType::VARBINARY_LITERAL;
+    std::string raw = std::string("a\0b", 3);
+    node.varbinary_literal.value = raw;
+    node.__isset.varbinary_literal = true;
+
+    Field f = dt.get_field(node);
+    const auto& sv = get<const StringViewField&>(f);
+    ASSERT_EQ(sv.size(), raw.size());
+    ASSERT_EQ(memcmp(sv.data(), raw.data(), sv.size()), 0);
+}
+
+TEST_F(DataTypeVarbinaryTest, ToProtobufDefaultLen) {
+    DataTypeVarbinary dt; // default len = -1
     PTypeDesc ptype;
     PTypeNode pnode;
     PScalarType scalar;
     dt.to_protobuf(&ptype, &pnode, &scalar);
-    EXPECT_EQ(scalar.len(), 123);
+    EXPECT_EQ(scalar.len(), -1);
+}
+
+TEST_F(DataTypeVarbinaryTest, GetFieldWithDataTypeNonInline) {
+    DataTypeVarbinary dt;
+    auto col = dt.create_column();
+    auto* vb = assert_cast<ColumnVarbinary*>(col.get());
+    std::string big = make_bytes(doris::StringView::kInlineSize + 6, 0x55);
+    vb->insert_data(big.data(), big.size());
+
+    auto fwd = dt.get_field_with_data_type(*col, 0);
+    EXPECT_EQ(fwd.base_scalar_type_id, PrimitiveType::TYPE_VARBINARY);
+    const auto& sv = get<const StringViewField&>(fwd.field);
+    ASSERT_EQ(sv.size(), big.size());
+    ASSERT_EQ(memcmp(sv.data(), big.data(), sv.size()), 0);
 }
 
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/exec/format/parquet/parquet_reader_test.cpp 
b/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
index 341526e0926..659f1211e24 100644
--- a/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
+++ b/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
@@ -243,9 +243,9 @@ TEST_F(ParquetReaderTest, uuid_varbinary) {
     auto varbinary_column =
             assert_cast<const 
ColumnVarbinary*>(nullable_column->get_nested_column_ptr().get());
     auto& data = varbinary_column->get_data();
-    EXPECT_EQ(data[0].dump_hex(), "X'550E8400E29B41D4A716446655440000'");
-    EXPECT_EQ(data[1].dump_hex(), "X'123E4567E89B12D3A456426614174000'");
-    EXPECT_EQ(data[2].dump_hex(), "X'00000000000000000000000000000000'");
+    EXPECT_EQ(data[0].dump_hex(), "0x550E8400E29B41D4A716446655440000");
+    EXPECT_EQ(data[1].dump_hex(), "0x123E4567E89B12D3A456426614174000");
+    EXPECT_EQ(data[2].dump_hex(), "0x00000000000000000000000000000000");
 }
 
 TEST_F(ParquetReaderTest, varbinary_varbinary) {
@@ -316,9 +316,9 @@ TEST_F(ParquetReaderTest, varbinary_varbinary) {
     auto varbinary_column =
             assert_cast<const 
ColumnVarbinary*>(nullable_column->get_nested_column_ptr().get());
     auto& data = varbinary_column->get_data();
-    EXPECT_EQ(data[0].dump_hex(), "X'0123456789ABCDEF'");
-    EXPECT_EQ(data[1].dump_hex(), "X'FEDCBA9876543210'");
-    EXPECT_EQ(data[2].dump_hex(), "X'00'");
+    EXPECT_EQ(data[0].dump_hex(), "0x0123456789ABCDEF");
+    EXPECT_EQ(data[1].dump_hex(), "0xFEDCBA9876543210");
+    EXPECT_EQ(data[2].dump_hex(), "0x00");
 }
 
 TEST_F(ParquetReaderTest, varbinary_string) {
@@ -391,9 +391,9 @@ TEST_F(ParquetReaderTest, varbinary_string) {
     auto varbinary_column =
             assert_cast<const 
ColumnVarbinary*>(nullable_column->get_nested_column_ptr().get());
     auto& data = varbinary_column->get_data();
-    EXPECT_EQ(data[0].dump_hex(), "X'0123456789ABCDEF'");
-    EXPECT_EQ(data[1].dump_hex(), "X'FEDCBA9876543210'");
-    EXPECT_EQ(data[2].dump_hex(), "X'00'");
+    EXPECT_EQ(data[0].dump_hex(), "0x0123456789ABCDEF");
+    EXPECT_EQ(data[1].dump_hex(), "0xFEDCBA9876543210");
+    EXPECT_EQ(data[2].dump_hex(), "0x00");
 }
 
 TEST_F(ParquetReaderTest, varbinary_string2) {
@@ -465,9 +465,9 @@ TEST_F(ParquetReaderTest, varbinary_string2) {
     auto nullable_column = assert_cast<const ColumnNullable*>(col.get());
     auto string_column =
             assert_cast<const 
ColumnString*>(nullable_column->get_nested_column_ptr().get());
-    EXPECT_EQ(StringView(string_column->get_data_at(0)).dump_hex(), 
"X'0123456789ABCDEF'");
-    EXPECT_EQ(StringView(string_column->get_data_at(1)).dump_hex(), 
"X'FEDCBA9876543210'");
-    EXPECT_EQ(StringView(string_column->get_data_at(2)).dump_hex(), "X'00'");
+    EXPECT_EQ(StringView(string_column->get_data_at(0)).dump_hex(), 
"0x0123456789ABCDEF");
+    EXPECT_EQ(StringView(string_column->get_data_at(1)).dump_hex(), 
"0xFEDCBA9876543210");
+    EXPECT_EQ(StringView(string_column->get_data_at(2)).dump_hex(), "0x00");
 }
 
 static ParquetReader* create_parquet_reader(TFileScanRangeParams& scan_params,
diff --git 
a/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
 
b/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
index b1caff8ae8f..70814f27f75 100644
--- 
a/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
+++ 
b/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
@@ -142,6 +142,13 @@ public class IcebergSysTableColumnValue implements 
ColumnValue {
 
     @Override
     public byte[] getBytes() {
+        // 
https://github.com/apache/iceberg/blob/8626ef5137024c1a69daaff97a832af6b0ae37ea/api/src/main/java/org/apache/iceberg/types/Type.java#L45C5-L45C30
+        if (fieldData instanceof ByteBuffer) {
+            ByteBuffer buffer = (ByteBuffer) fieldData;
+            byte[] bytes = new byte[buffer.remaining()];
+            buffer.get(bytes);
+            return bytes;
+        }
         return (byte[]) fieldData;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index 78bc458b76f..2d3cd457130 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -255,6 +255,7 @@ public class OutFileClause {
             case HLL:
             case BITMAP:
             case QUANTILE_STATE:
+            case VARBINARY:
                 orcType = "binary";
                 break;
             case DATEV2:
@@ -412,6 +413,7 @@ public class OutFileClause {
                 case HLL:
                 case BITMAP:
                 case QUANTILE_STATE:
+                case VARBINARY:
                     checkOrcType(schema.second, "binary", true, 
resultType.getPrimitiveType().toString());
                     break;
                 case STRUCT:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
index cbec9deb0eb..29f93153738 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
@@ -143,7 +143,7 @@ public class CheckAfterRewrite extends 
OneAnalysisRuleFactory {
         if (plan instanceof LogicalAggregate) {
             LogicalAggregate<?> agg = (LogicalAggregate<?>) plan;
             for (Expression groupBy : agg.getGroupByExpressions()) {
-                if (groupBy.getDataType().isObjectOrVariantType()) {
+                if (groupBy.getDataType().isObjectOrVariantType() || 
groupBy.getDataType().isVarBinaryType()) {
                     throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
                 }
             }
@@ -181,11 +181,17 @@ public class CheckAfterRewrite extends 
OneAnalysisRuleFactory {
             for (Expression conjunct : join.getHashJoinConjuncts()) {
                 if (conjunct.anyMatch(e -> ((Expression) 
e).getDataType().isVariantType())) {
                     throw new AnalysisException("variant type could not in 
join equal conditions: " + conjunct.toSql());
+                } else if (conjunct.anyMatch(e -> ((Expression) 
e).getDataType().isVarBinaryType())) {
+                    throw new AnalysisException(
+                            "varbinary type could not in join equal 
conditions: " + conjunct.toSql());
                 }
             }
             for (Expression conjunct : join.getMarkJoinConjuncts()) {
                 if (conjunct.anyMatch(e -> ((Expression) 
e).getDataType().isVariantType())) {
                     throw new AnalysisException("variant type could not in 
join equal conditions: " + conjunct.toSql());
+                } else if (conjunct.anyMatch(e -> ((Expression) 
e).getDataType().isVarBinaryType())) {
+                    throw new AnalysisException(
+                            "varbinary type could not in join equal 
conditions: " + conjunct.toSql());
                 }
             }
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java
index de5dfcbdfe2..a06b7c20053 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java
@@ -21,6 +21,8 @@ import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.nereids.types.coercion.PrimitiveType;
 
+import com.google.common.base.Preconditions;
+
 import java.util.Objects;
 
 /**
@@ -40,6 +42,8 @@ public class VarBinaryType extends PrimitiveType {
     }
 
     public VarBinaryType(int len) {
+        Preconditions.checkArgument(0 <= len && len <= MAX_VARBINARY_LENGTH,
+                "VarBinary length must be between 0 and " + 
MAX_VARBINARY_LENGTH + ", but got: " + len);
         this.len = len;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
index e02a2925674..0e1689caf72 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
@@ -1321,6 +1321,12 @@ public class TypeCoercionUtils {
         Expression left = comparisonPredicate.left();
         Expression right = comparisonPredicate.right();
 
+        // TODO: remove this restriction after supporting varbinary comparison 
in BE
+        if (left.getDataType().isVarBinaryType() || 
right.getDataType().isVarBinaryType()) {
+            throw new AnalysisException("data type varbinary "
+                    + " could not used in ComparisonPredicate now " + 
comparisonPredicate.toSql());
+        }
+
         // same type
         if (left.getDataType().equals(right.getDataType())) {
             if (!supportCompare(left.getDataType(), false)) {
diff --git 
a/regression-test/data/external_table_p0/export/test_hive_export_varbinary.out 
b/regression-test/data/external_table_p0/export/test_hive_export_varbinary.out
new file mode 100644
index 00000000000..f50f72bd68a
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/export/test_hive_export_varbinary.out
@@ -0,0 +1,29 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_tvf0 --
+1      0x550E8400E29B41D4A716446655440000      0x0123456789ABCDEF
+2      0x123E4567E89B12D3A456426614174000      0xFEDCBA9876543210
+3      0x00000000000000000000000000000000      0x00
+4      \N      \N
+5      0xABCDEF1234567890      0xFFFF
+
+-- !select_tvf1 --
+1      0x550E8400E29B41D4A716446655440000      0x0123456789ABCDEF
+2      0x123E4567E89B12D3A456426614174000      0xFEDCBA9876543210
+3      0x00000000000000000000000000000000      0x00
+4      \N      \N
+5      0xABCDEF1234567890      0xFFFF
+
+-- !select_tvf2 --
+1      0x550E8400E29B41D4A716446655440000      0x0123456789ABCDEF
+2      0x123E4567E89B12D3A456426614174000      0xFEDCBA9876543210
+3      0x00000000000000000000000000000000      0x00
+4      \N      \N
+5      0xABCDEF1234567890      0xFFFF
+
+-- !select_tvf3 --
+1      0x550E8400E29B41D4A716446655440000      0x0123456789ABCDEF
+2      0x123E4567E89B12D3A456426614174000      0xFEDCBA9876543210
+3      0x00000000000000000000000000000000      0x00
+4      \N      \N
+5      0xABCDEF1234567890      0xFFFF
+
diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc.out 
b/regression-test/data/external_table_p0/hive/test_hive_orc.out
index 42f49602d9a..aa105acc1cd 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_orc.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_orc.out
@@ -1005,6 +1005,54 @@ tablets  tinyint_col     179     182     182     187     
183     181     177     183     177     187     183     202     202     186     
528
 4      71      986333570       88457348565826264       true    3.851428E7      
5.177242499015833e+17   Football stumble result taste pleased midst. Mirror 
loyal divide. Ultimately injury chip lawyer. Leadership teacher belong.     \N  
    2022-08-26T19:19:31     140717.2626     phones  smallint_col    2022-08-22  
    []      ["PWCCGPfT"]    phones  smallint_col
 3      21      986131998       683875991736989008      \N      6.0774349E8     
\N      Weird period none. Assertion coincide college. Subscriber fridge craft. 
Poisonous donation ordinary. Explode village debt.      split terrify   
2022-08-27T01:05:20     390407.1015     tablets tinyint_col     2015-11-04      
[6.630172173849485e+17] [null, "lSQFYzUG", "vMVMwfZzpl", "QRFiYUUefBc", 
"VdtTHy", "YrPtPPzynqXCCzm", "LfIgQvGimBBzlgn"] tablets tinyint_col
 
+-- !sql_topn_binary_col1 --
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+
+-- !sql_topn_binary_col2 --
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+
+-- !sql_topn_binary_col3 --
+0x79656C6C6F772070756E6368     yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
     yesterday settlement unconscious biology
+0x796574207265696E666F726365   yet reinforce
+0x7965742072656C6178   yet relax
+0x796F756E67206372656174697665207072657365727665       young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365     
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379     yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C   yours session council
+0x796F75746820626563617573652065787065646974696F6E     youth because expedition
+0x7A65726F207765617220736561736F6E     zero wear season
+
+-- !sql_topn_binary_col4 --
+0x79656C6C6F772070756E6368     yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
     yesterday settlement unconscious biology
+0x796574207265696E666F726365   yet reinforce
+0x7965742072656C6178   yet relax
+0x796F756E67206372656174697665207072657365727665       young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365     
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379     yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C   yours session council
+0x796F75746820626563617573652065787065646974696F6E     youth because expedition
+0x7A65726F207765617220736561736F6E     zero wear season
+
 -- !select_top50 --
 4      55      999742610       400899305488827731      false   6.5976813E8     
7.87233046169374e+17    \N      base tennis pit vertical friday 
2022-08-19T07:29:58     \N      tablets smallint_col    2019-02-07      
[7.53124931825377e+17]  ["NbSSBtwzpxNSkkwga"]   tablets smallint_col
 2      49      999613702       105493714032727452      \N      6.3322381E8     
9.864232441024018e+17   Unveil bright recruit participate. Suspect impression 
camera mathematical revelation. Fault live2 elbow debt west hydrogen current.   
  how literary    2022-09-03T17:20:21     481707.1065     tablets boolean_col   
  2020-01-12      []      ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", 
"kGEBBckbMtX", "hrEtCGFdPWZK"]      tablets boolean_col
@@ -2011,3 +2059,51 @@ tablets  tinyint_col     179     182     182     187     
183     181     177     183     177     187     183     202     202     186     
528
 4      71      986333570       88457348565826264       true    3.851428E7      
5.177242499015833e+17   Football stumble result taste pleased midst. Mirror 
loyal divide. Ultimately injury chip lawyer. Leadership teacher belong.     \N  
    2022-08-26T19:19:31     140717.2626     phones  smallint_col    2022-08-22  
    []      ["PWCCGPfT"]    phones  smallint_col
 3      21      986131998       683875991736989008      \N      6.0774349E8     
\N      Weird period none. Assertion coincide college. Subscriber fridge craft. 
Poisonous donation ordinary. Explode village debt.      split terrify   
2022-08-27T01:05:20     390407.1015     tablets tinyint_col     2015-11-04      
[6.630172173849485e+17] [null, "lSQFYzUG", "vMVMwfZzpl", "QRFiYUUefBc", 
"VdtTHy", "YrPtPPzynqXCCzm", "LfIgQvGimBBzlgn"] tablets tinyint_col
 
+-- !sql_topn_binary_col1 --
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+
+-- !sql_topn_binary_col2 --
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+\N     \N
+
+-- !sql_topn_binary_col3 --
+0x79656C6C6F772070756E6368     yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
     yesterday settlement unconscious biology
+0x796574207265696E666F726365   yet reinforce
+0x7965742072656C6178   yet relax
+0x796F756E67206372656174697665207072657365727665       young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365     
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379     yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C   yours session council
+0x796F75746820626563617573652065787065646974696F6E     youth because expedition
+0x7A65726F207765617220736561736F6E     zero wear season
+
+-- !sql_topn_binary_col4 --
+0x79656C6C6F772070756E6368     yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
     yesterday settlement unconscious biology
+0x796574207265696E666F726365   yet reinforce
+0x7965742072656C6178   yet relax
+0x796F756E67206372656174697665207072657365727665       young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365     
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379     yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C   yours session council
+0x796F75746820626563617573652065787065646974696F6E     youth because expedition
+0x7A65726F207765617220736561736F6E     zero wear season
+
diff --git 
a/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out
index f6b7072cc54..030a379d20b 100644
--- a/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out
@@ -975,3 +975,38 @@ total_data_file_size_in_bytes      bigint  Yes     true    
\N      NONE
 -- !select_partitions_count --
 9
 
+-- !varbinary_sys_table_desc --
+column_sizes   map<int,bigint> Yes     true    \N      NONE
+content        int     Yes     true    \N      NONE
+content_offset bigint  Yes     true    \N      NONE
+content_size_in_bytes  bigint  Yes     true    \N      NONE
+equality_ids   array<int>      Yes     true    \N      NONE
+file_format    text    Yes     true    \N      NONE
+file_path      text    Yes     true    \N      NONE
+file_size_in_bytes     bigint  Yes     true    \N      NONE
+first_row_id   bigint  Yes     true    \N      NONE
+key_metadata   varbinary(2147483647)   Yes     true    \N      NONE
+lower_bounds   map<int,varbinary(2147483647)>  Yes     true    \N      NONE
+nan_value_counts       map<int,bigint> Yes     true    \N      NONE
+null_value_counts      map<int,bigint> Yes     true    \N      NONE
+readable_metrics       
struct<id:struct<column_size:bigint,value_count:bigint,null_value_count:bigint,nan_value_count:bigint,lower_bound:int,upper_bound:int>,name:struct<column_size:bigint,value_count:bigint,null_value_count:bigint,nan_value_count:bigint,lower_bound:text,upper_bound:text>>
     Yes     true    \N      NONE
+record_count   bigint  Yes     true    \N      NONE
+referenced_data_file   text    Yes     true    \N      NONE
+sort_order_id  int     Yes     true    \N      NONE
+spec_id        int     Yes     true    \N      NONE
+split_offsets  array<bigint>   Yes     true    \N      NONE
+upper_bounds   map<int,varbinary(2147483647)>  Yes     true    \N      NONE
+value_counts   map<int,bigint> Yes     true    \N      NONE
+
+-- !varbinary_sys_table_select --
+0      PARQUET 1       {1:0x01000000, 2:0x416C696365}  {1:0x01000000, 
2:0x416C696365}
+0      PARQUET 1       {1:0x02000000, 2:0x426F622055706461746564}      
{1:0x02000000, 2:0x426F622055706461746564}
+0      PARQUET 1       {1:0x02000000, 2:0x426F62}      {1:0x02000000, 
2:0x426F62}
+0      PARQUET 1       {1:0x04000000, 2:0x44617665}    {1:0x04000000, 
2:0x44617665}
+0      PARQUET 1       {1:0x05000000, 2:0x457665}      {1:0x05000000, 
2:0x457665}
+0      PARQUET 1       {1:0x06000000, 2:0x4672616E6B}  {1:0x06000000, 
2:0x4672616E6B}
+0      PARQUET 1       {1:0x07000000, 2:0x4772616365}  {1:0x07000000, 
2:0x4772616365}
+0      PARQUET 1       {1:0x08000000, 2:0x4865696469}  {1:0x08000000, 
2:0x4865696469}
+0      PARQUET 1       {1:0x09000000, 2:0x4976616E}    {1:0x09000000, 
2:0x4976616E}
+0      PARQUET 1       {1:0x0A000000, 2:0x4A756479}    {1:0x0A000000, 
2:0x4A756479}
+
diff --git 
a/regression-test/suites/external_table_p0/export/test_hive_export_varbinary.groovy
 
b/regression-test/suites/external_table_p0/export/test_hive_export_varbinary.groovy
new file mode 100644
index 00000000000..dff673c6b61
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/export/test_hive_export_varbinary.groovy
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_hive_export_varbinary", "external,hive,external_docker") {
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2"]) {
+        setHivePrefix(hivePrefix)
+        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+        // It's okay to use random `hdfsUser`, but can not be empty.
+        def hdfsUserName = "doris"
+        def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
+        def outfile_path = "/user/doris/tmp_data"
+        def uri = "${defaultFS}" + "${outfile_path}/exp_"
+
+        def outfile_to_HDFS = {format,export_table_name ->
+            // select ... into outfile ...
+            def uuid = UUID.randomUUID().toString()
+            outfile_path = "/user/doris/tmp_data/${uuid}"
+            uri = "${defaultFS}" + "${outfile_path}/exp_"
+
+            def res = sql """
+                SELECT * FROM ${export_table_name} t ORDER BY id
+                INTO OUTFILE "${uri}"
+                FORMAT AS ${format}
+                PROPERTIES (
+                    "fs.defaultFS"="${defaultFS}",
+                    "hadoop.username" = "${hdfsUserName}"
+                );
+            """
+            logger.info("outfile success path: " + res[0][3]);
+            return res[0][3]
+        }
+
+        try {
+            String catalog_name_with_export = 
"${hivePrefix}_test_varbinary_with_export"
+            sql """drop catalog if exists ${catalog_name_with_export}"""
+            sql """create catalog if not exists ${catalog_name_with_export} 
properties (
+                "type"="hms",
+                'hive.metastore.uris' = 
'thrift://${externalEnvIp}:${hms_port}',
+                "enable.mapping.varbinary"="true"
+            );"""
+
+            sql """ switch ${catalog_name_with_export}"""
+            sql """ use `test_varbinary` """
+
+            // test outfile to hdfs
+            def format = "parquet"
+            def export_table_name = "test_hive_binary_parquet"
+
+            def outfile_url0 = outfile_to_HDFS(format, export_table_name)
+            order_qt_select_tvf0 """ select * from HDFS(
+                        "uri" = "${outfile_url0}.${format}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "enable_mapping_varbinary"="true",
+                        "format" = "${format}");
+                        """
+
+            format = "parquet"
+            export_table_name = "test_hive_binary_orc"
+            def outfile_url1 = outfile_to_HDFS(format, export_table_name)
+            order_qt_select_tvf1 """ select * from HDFS(
+                        "uri" = "${outfile_url1}.${format}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "enable_mapping_varbinary"="true",
+                        "format" = "${format}");
+                        """
+
+            format = "orc"
+            export_table_name = "test_hive_binary_parquet"
+            def outfile_url2 = outfile_to_HDFS(format, export_table_name)
+            order_qt_select_tvf2 """ select * from HDFS(
+                        "uri" = "${outfile_url2}.${format}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "enable_mapping_varbinary"="true",
+                        "format" = "${format}");
+                        """
+
+            format = "orc"
+            export_table_name = "test_hive_binary_orc"
+            def outfile_url3 = outfile_to_HDFS(format, export_table_name)
+            order_qt_select_tvf3 """ select * from HDFS(
+                        "uri" = "${outfile_url3}.${format}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "enable_mapping_varbinary"="true",
+                        "format" = "${format}");
+                        """
+
+        } finally {
+        }
+    }
+}
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
index 380169fa667..8e19fe581d5 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
@@ -204,6 +204,61 @@ suite("test_hive_orc", 
"all_types,p0,external,hive,external_docker,external_dock
             sql """use `${catalog_name}`.`default`"""
             select_top50()
             sql """drop catalog if exists ${catalog_name}"""
+
+            sql """drop catalog if exists test_hive_orc_mapping_varbinary"""
+            sql """create catalog if not exists 
test_hive_orc_mapping_varbinary properties (
+                "type"="hms",
+                'hive.metastore.uris' = 
'thrift://${externalEnvIp}:${hms_port}',
+                'enable.mapping.varbinary' = 'true'
+            );"""
+            sql """use `test_hive_orc_mapping_varbinary`.`default`"""
+
+            explain {
+                sql("select  binary_col from  orc_all_types order by 
binary_col,string_col asc limit 10;")
+                contains("TOPN OPT:1")
+            }
+            explain {
+                sql("select  binary_col from  orc_all_types order by 
binary_col asc limit 10;")
+                contains("TOPN OPT:1")
+            }
+            order_qt_sql_topn_binary_col1 """ select  
binary_col,cast(binary_col as string) from  orc_all_types order by binary_col 
asc limit 10; """
+            order_qt_sql_topn_binary_col2 """ select  
binary_col,cast(binary_col as string) from  orc_all_types order by binary_col 
asc ,string_col asc limit 10; """
+            order_qt_sql_topn_binary_col3 """ select  
binary_col,cast(binary_col as string) from  orc_all_types order by binary_col 
desc limit 10; """
+            order_qt_sql_topn_binary_col4 """ select  
binary_col,cast(binary_col as string) from  orc_all_types order by binary_col 
desc,string_col desc limit 10; """
+
+            sql """ switch internal; """
+            sql """ drop database if exists test_view_varbinary_db"""
+            sql """ create database if not exists test_view_varbinary_db"""
+            sql """use test_view_varbinary_db"""
+            test {
+                sql " create view test_view_varbinary as select binary_col 
from `test_hive_orc_mapping_varbinary`.`default`.`orc_all_types`; "
+                exception " View does not support VARBINARY type: binary_col"
+            }
+
+            test {
+                sql """ CREATE MATERIALIZED VIEW test_mv_varbinary
+                        BUILD DEFERRED REFRESH AUTO ON MANUAL
+                        DISTRIBUTED BY RANDOM BUCKETS 2
+                        PROPERTIES ('replication_num' = '1')
+                        AS select binary_col from 
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types`; """
+                exception " MTMV do not support varbinary type : binary_col"
+            }
+
+            test {
+                sql " select count() from 
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types` group by 
binary_col; "
+                exception " errCode = 2"
+            }
+
+            test {
+                sql " select * from 
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types` as a join 
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types`  as b on 
a.binary_col = b.binary_col; "
+                exception " errCode = 2,"
+            }
+
+            test {
+                sql " select * from 
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types` where binary_col = 
X'AB'; "
+                exception " could not used in ComparisonPredicate now"
+            }
+
         } finally {
         }
     }
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
index 0b2925a0d6e..0efa0e0233a 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
@@ -390,4 +390,23 @@ suite("test_iceberg_sys_table", 
"p0,external,doris,external_docker,external_dock
         sql """select committed_at, snapshot_id, parent_id, operation from 
${catalog_name}.${db_name}.test_iceberg_systable_tbl1\$snapshots"""
     }
     try_sql("DROP USER ${user}")
+
+    sql """drop catalog if exists test_iceberg_varbinary_sys"""
+    sql """
+    CREATE CATALOG test_iceberg_varbinary_sys PROPERTIES (
+        'type'='iceberg',
+        'iceberg.catalog.type'='rest',
+        'uri' = 'http://${externalEnvIp}:${rest_port}',
+        "s3.access_key" = "admin",
+        "s3.secret_key" = "password",
+        "s3.endpoint" = "http://${externalEnvIp}:${minio_port}";,
+        "s3.region" = "us-east-1",
+        'enable.mapping.varbinary' = 'true'
+    );"""
+
+    sql """switch test_iceberg_varbinary_sys """
+    sql """use ${db_name}"""
+
+    order_qt_varbinary_sys_table_desc """desc 
test_iceberg_systable_unpartitioned\$files"""
+    order_qt_varbinary_sys_table_select """select content, file_format, 
record_count, lower_bounds, upper_bounds from 
test_iceberg_systable_unpartitioned\$files;"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to