Copilot commented on code in PR #56718:
URL: https://github.com/apache/doris/pull/56718#discussion_r2413915167


##########
be/src/vec/exprs/vsearch.cpp:
##########
@@ -48,39 +48,63 @@ Status collect_search_inputs(const VSearchExpr& expr, 
VExprContext* context,
 
     auto index_context = context->get_inverted_index_context();
     if (index_context == nullptr) {
-        return Status::OK();
+        LOG(WARNING) << "collect_search_inputs: No inverted index context 
available";
+        return Status::InternalError("No inverted index context available");
     }
 
+    // Get field bindings for variant subcolumn support
+    const auto& search_param = expr.get_search_param();
+    const auto& field_bindings = search_param.field_bindings;
+
+    int child_index = 0; // Index for iterating through children
     for (const auto& child : expr.children()) {
         if (child->is_slot_ref()) {
             auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
             int column_id = column_slot_ref->column_id();
             auto* iterator = 
index_context->get_inverted_index_iterator_by_column_id(column_id);
-            if (iterator == nullptr) {
-                continue;
+
+            // Determine the field_name from field_bindings (for variant 
subcolumns)
+            // field_bindings and children should have the same order
+            std::string field_name;
+            if (child_index < field_bindings.size()) {
+                // Use field_name from binding (may include "parent.subcolumn" 
for variant)
+                field_name = field_bindings[child_index].field_name;
+            } else {
+                // Fallback to column_name if binding not found
+                field_name = column_slot_ref->column_name();
             }
 
-            const auto* storage_name_type =
-                    
index_context->get_storage_name_and_type_by_column_id(column_id);
-            if (storage_name_type == nullptr) {
-                auto err_msg = fmt::format(
-                        "storage_name_type cannot be found for column {} while 
in {} evaluate",
-                        column_id, expr.expr_name());
-                LOG(ERROR) << err_msg;
-                return Status::InternalError(err_msg);
+            // Only collect fields that have iterators (materialized columns 
with indexes)
+            if (iterator != nullptr) {
+                const auto* storage_name_type =
+                        
index_context->get_storage_name_and_type_by_column_id(column_id);
+                if (storage_name_type == nullptr) {
+                    return Status::InternalError("storage_name_type not found 
for column {} in {}",
+                                                 column_id, expr.expr_name());
+                }
+
+                bundle->iterators.emplace(field_name, iterator);
+                bundle->field_types.emplace(field_name, *storage_name_type);
+                bundle->column_ids.emplace_back(column_id);
             }
 
-            auto column_name = column_slot_ref->column_name();
-            bundle->iterators.emplace(column_name, iterator);
-            bundle->field_types.emplace(column_name, *storage_name_type);
-            bundle->column_ids.emplace_back(column_id);
+            child_index++;
         } else if (child->is_literal()) {
             auto* literal = assert_cast<VLiteral*>(child.get());
             bundle->literal_args.emplace_back(literal->get_column_ptr(), 
literal->get_data_type(),
                                               literal->expr_name());
         } else {
-            LOG(WARNING) << "VSearchExpr: Unsupported child node type 
encountered";
-            return Status::InvalidArgument("search expression child type 
unsupported");
+            // Check if this is ElementAt expression (for variant subcolumn 
access)
+            if (child->expr_name() == "element_at" && child_index < 
field_bindings.size() &&
+                field_bindings[child_index].__isset.is_variant_subcolumn &&
+                field_bindings[child_index].is_variant_subcolumn) {

Review Comment:
   The hardcoded string 'element_at' should be defined as a constant to avoid 
magic strings and improve maintainability.



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java:
##########
@@ -233,18 +233,32 @@ public QsNode 
visitAtomClause(SearchParser.AtomClauseContext ctx) {
 
         @Override
         public QsNode visitFieldQuery(SearchParser.FieldQueryContext ctx) {
-            if (ctx.fieldName() == null) {
-                throw new RuntimeException("Invalid field query: missing field 
name");
+            if (ctx.fieldPath() == null) {
+                throw new RuntimeException("Invalid field query: missing field 
path");

Review Comment:
   The error message should be more descriptive and specify what constitutes a 
valid field path.
   ```suggestion
                   throw new RuntimeException("Invalid field query: missing 
field path. A valid field path consists of one or more segments (e.g., field, 
field.subfield), where each segment is an identifier or a quoted string if it 
contains special characters.");
   ```



##########
be/src/vec/functions/function_search.cpp:
##########
@@ -171,27 +192,31 @@ Status 
FunctionSearch::evaluate_inverted_index_with_search_param(
                 data_type_with_names,
         std::unordered_map<std::string, IndexIterator*> iterators, uint32_t 
num_rows,
         InvertedIndexResultBitmap& bitmap_result) const {
-    VLOG_DEBUG << "search: Processing DSL '" << search_param.original_dsl << 
"' with "
-               << data_type_with_names.size() << " indexed columns and " << 
iterators.size()
-               << " iterators";
-
     if (iterators.empty() || data_type_with_names.empty()) {
-        LOG(INFO) << "No indexed columns or iterators available, returning 
empty result";
+        LOG(INFO) << "No indexed columns or iterators available, returning 
empty result, dsl:"
+                  << search_param.original_dsl;
+        bitmap_result = 
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+                                                  
std::make_shared<roaring::Roaring>());

Review Comment:
   The creation of an empty InvertedIndexResultBitmap is duplicated in multiple 
places. Consider extracting this into a helper function to reduce code 
duplication.



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java:
##########
@@ -127,6 +169,7 @@ private Expression rewriteSearch(Search search, 
LogicalOlapScan scan) {
     }
 
     private Slot findSlotByName(String fieldName, LogicalOlapScan scan) {
+        // Direct match only - variant subcolumns are handled by caller

Review Comment:
   The comment should explain why variant subcolumns are handled by the caller 
and provide a brief explanation of the handling mechanism.
   ```suggestion
           /*
            * This method performs a direct match for slot names only and does 
not attempt to resolve variant subcolumns.
            * Variant subcolumns (e.g., fields within complex or nested types) 
require special parsing and resolution logic,
            * which is handled by the caller prior to invoking this method. The 
caller is responsible for extracting the
            * appropriate subcolumn name from the field path and ensuring that 
the correct slot or sub-slot is passed here
            * for direct matching. This separation keeps this method simple and 
focused on direct slot lookup.
            */
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to