This is an automated email from the ASF dual-hosted git repository.

wusheng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git


The following commit(s) were added to refs/heads/main by this push:
     new 98866389b fix(trace): fix entity tag handling in trace filter to 
prevent TagIdx index mismatch (#1032)
98866389b is described below

commit 98866389b63b8600ee1032c63ae8a792406941b7
Author: Gao Hongtao <[email protected]>
AuthorDate: Mon Mar 30 17:28:46 2026 +0800

    fix(trace): fix entity tag handling in trace filter to prevent TagIdx index 
mismatch (#1032)
---
 CHANGES.md                                         |  1 +
 pkg/query/logical/trace/index_filter.go            | 21 ++++---
 pkg/query/logical/trace/index_filter_test.go       | 67 ++++++++++++++++++++++
 pkg/query/logical/trace/trace_plan_tag_filter.go   |  5 ++
 ...ce_instance_and_endpoint_order_timestamp_asc.ql | 22 +++++++
 ...e_instance_and_endpoint_order_timestamp_asc.yml | 39 +++++++++++++
 ...e_instance_and_endpoint_order_timestamp_asc.yml | 58 +++++++++++++++++++
 test/cases/trace/trace.go                          |  1 +
 8 files changed, 203 insertions(+), 11 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 554063268..0a6999fc8 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -59,6 +59,7 @@ Release Notes.
 - Fix segment reference leaks in measure/stream/trace queries and ensure 
chunked sync sessions close part contexts correctly.
 - Fix duplicate query execution in distributed measure Agg+TopN queries by 
enabling push-down aggregation, removing the wasteful double-query pattern.
 - Fix nil pointer panic in segment collectMetrics during shutdown.
+- Fix entity tag handling in trace filter to prevent TagIdx index mismatch 
when filtering with both entity and non-entity tags.
 
 ### Document
 
diff --git a/pkg/query/logical/trace/index_filter.go 
b/pkg/query/logical/trace/index_filter.go
index 2a606df2f..7947b8814 100644
--- a/pkg/query/logical/trace/index_filter.go
+++ b/pkg/query/logical/trace/index_filter.go
@@ -348,24 +348,23 @@ func buildFilterFromCondition(cond *modelv1.Condition, 
schema logical.Schema, ta
                }
        }
 
-       if cond.Name == traceIDTagName && (cond.Op == 
modelv1.Condition_BINARY_OP_EQ || cond.Op == modelv1.Condition_BINARY_OP_IN) {
-               traceIDs = extractIDsFromCondition(cond)
-       } else if cond.Name != spanIDTagName {
-               collectedTagNames = append(collectedTagNames, cond.Name)
-       }
-
-       _, parsedEntity, err := logical.ParseExprOrEntity(entityDict, entity, 
cond)
+       // Check entity condition first: entity tags are handled by series 
routing and are NOT stored
+       // as tag data in SIDX blocks. They must not be added to 
collectedTagNames, as that would
+       // shift TagIdx values in conditionSchema and cause wrong tag values to 
be looked up.
+       expr, parsedEntity, err := logical.ParseExprOrEntity(entityDict, 
entity, cond)
        if err != nil {
                return nil, nil, collectedTagNames, traceIDs, minVal, maxVal, 
err
        }
        if parsedEntity != nil {
                return nil, parsedEntity, collectedTagNames, traceIDs, minVal, 
maxVal, nil
        }
-       // For trace, all non-entity tags have skipping index
-       expr, _, err := logical.ParseExprOrEntity(entityDict, entity, cond)
-       if err != nil {
-               return nil, nil, collectedTagNames, traceIDs, minVal, maxVal, 
err
+       // Non-entity condition: add to collectedTagNames (but skip traceID and 
spanID special cases)
+       if cond.Name == traceIDTagName && (cond.Op == 
modelv1.Condition_BINARY_OP_EQ || cond.Op == modelv1.Condition_BINARY_OP_IN) {
+               traceIDs = extractIDsFromCondition(cond)
+       } else if cond.Name != spanIDTagName {
+               collectedTagNames = append(collectedTagNames, cond.Name)
        }
+       // For trace, all non-entity tags have skipping index
        filter, entities, err := parseConditionToFilter(cond, schema, entity, 
expr)
        return filter, entities, collectedTagNames, traceIDs, minVal, maxVal, 
err
 }
diff --git a/pkg/query/logical/trace/index_filter_test.go 
b/pkg/query/logical/trace/index_filter_test.go
index fac72009b..2f6843b2c 100644
--- a/pkg/query/logical/trace/index_filter_test.go
+++ b/pkg/query/logical/trace/index_filter_test.go
@@ -283,6 +283,73 @@ func TestTraceHavingFilterIntegration(t *testing.T) {
        assert.False(t, shouldSkip, "should not skip when services are found")
 }
 
+// TestBuildFilterEntityTagNotInCollectedTagNames verifies that entity tags 
(e.g. serviceInstanceId)
+// are NOT included in collectedTagNames. Entity tags are handled by series 
routing and are not
+// stored as tag data in SIDX blocks. Including them shifts TagIdx values in 
conditionSchema,
+// causing wrong tag values to be looked up when filtering non-entity tags 
like traceState.
+func TestBuildFilterEntityTagNotInCollectedTagNames(t *testing.T) {
+       trace := &databasev1.Trace{
+               Metadata: &commonv1.Metadata{Name: "test", Group: "default"},
+               Tags: []*databasev1.TraceTagSpec{
+                       {Name: "trace_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "span_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "timestamp", Type: 
databasev1.TagType_TAG_TYPE_TIMESTAMP},
+                       {Name: "service_instance_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "service_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "state", Type: databasev1.TagType_TAG_TYPE_INT},
+               },
+               TraceIdTagName:   "trace_id",
+               SpanIdTagName:    "span_id",
+               TimestampTagName: "timestamp",
+       }
+       traceSchema, schemaErr := BuildSchema(trace, nil)
+       assert.NoError(t, schemaErr)
+
+       // service_instance_id is an entity tag
+       entityDict := map[string]int{
+               "service_id":          0,
+               "service_instance_id": 1,
+       }
+       entity := []*modelv1.TagValue{
+               {Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: ""}}},
+               {Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: ""}}},
+       }
+
+       // Condition: service_instance_id = "X" AND state = 0
+       // service_instance_id is an entity tag: must NOT appear in 
collectedTagNames.
+       // state is a regular tag: must appear in collectedTagNames.
+       criteria := &modelv1.Criteria{
+               Exp: &modelv1.Criteria_Le{
+                       Le: &modelv1.LogicalExpression{
+                               Op: modelv1.LogicalExpression_LOGICAL_OP_AND,
+                               Left: &modelv1.Criteria{
+                                       Exp: &modelv1.Criteria_Condition{
+                                               Condition: &modelv1.Condition{
+                                                       Name:  
"service_instance_id",
+                                                       Op:    
modelv1.Condition_BINARY_OP_EQ,
+                                                       Value: 
&modelv1.TagValue{Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: 
"instance-1"}}},
+                                               },
+                                       },
+                               },
+                               Right: &modelv1.Criteria{
+                                       Exp: &modelv1.Criteria_Condition{
+                                               Condition: &modelv1.Condition{
+                                                       Name:  "state",
+                                                       Op:    
modelv1.Condition_BINARY_OP_EQ,
+                                                       Value: 
&modelv1.TagValue{Value: &modelv1.TagValue_Int{Int: &modelv1.Int{Value: 0}}},
+                                               },
+                                       },
+                               },
+                       },
+               },
+       }
+       _, _, collectedTagNames, _, _, _, buildErr := buildTraceFilter( 
//nolint:dogsled
+               criteria, traceSchema, entityDict, entity, "trace_id", 
"span_id", "")
+       assert.NoError(t, buildErr)
+       assert.Equal(t, []string{"state"}, collectedTagNames,
+               "entity tag service_instance_id must not be in 
collectedTagNames; only non-entity tag state should appear")
+}
+
 // TestBuildFilterDeduplicatesCollectedTagNames verifies that buildFilter 
deduplicates tag names
 // when criteria has multiple conditions on the same tag (e.g. duration > 100 
AND duration < 200).
 func TestBuildFilterDeduplicatesCollectedTagNames(t *testing.T) {
diff --git a/pkg/query/logical/trace/trace_plan_tag_filter.go 
b/pkg/query/logical/trace/trace_plan_tag_filter.go
index cf900fdbb..d407a7607 100644
--- a/pkg/query/logical/trace/trace_plan_tag_filter.go
+++ b/pkg/query/logical/trace/trace_plan_tag_filter.go
@@ -96,6 +96,11 @@ func (uis *unresolvedTraceTagFilter) Analyze(s 
logical.Schema) (logical.Plan, er
                }
        }
 
+       // Add entity tags to projection for row-level filtering
+       // Entity tags are not in SIDX blocks (handled by series routing) but 
need to be
+       // projected so they can be validated at the row level during span 
filtering
+       ctx.projectionTags.Names = append(ctx.projectionTags.Names, 
entityList...)
+
        // Add tag names from filter conditions to projection
        var conditionSchema logical.Schema
        if len(conditionTagNames) > 0 {
diff --git 
a/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.ql
 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.ql
new file mode 100644
index 000000000..02d80e113
--- /dev/null
+++ 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.ql
@@ -0,0 +1,22 @@
+# Licensed to Apache Software Foundation (ASF) under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Apache Software Foundation (ASF) licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+SELECT () FROM TRACE sw IN test-trace-group
+TIME > '-15m'
+WHERE service_instance_id = 'webapp_instance_1' AND endpoint_id = 
'/home_endpoint'
+ORDER BY timestamp ASC
diff --git 
a/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.yml
 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.yml
new file mode 100644
index 000000000..7b2bc26c6
--- /dev/null
+++ 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.yml
@@ -0,0 +1,39 @@
+# Licensed to Apache Software Foundation (ASF) under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Apache Software Foundation (ASF) licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: "sw"
+groups: ["test-trace-group"]
+criteria:
+  le:
+    op: "LOGICAL_OP_AND"
+    left:
+      condition:
+        name: "service_instance_id"
+        op: "BINARY_OP_EQ"
+        value:
+          str:
+            value: "webapp_instance_1"
+    right:
+      condition:
+        name: "endpoint_id"
+        op: "BINARY_OP_EQ"
+        value:
+          str:
+            value: "/home_endpoint"
+order_by:
+  index_rule_name: "timestamp"
+  sort: "SORT_ASC"
diff --git 
a/test/cases/trace/data/want/eq_service_instance_and_endpoint_order_timestamp_asc.yml
 
b/test/cases/trace/data/want/eq_service_instance_and_endpoint_order_timestamp_asc.yml
new file mode 100644
index 000000000..bc4b55b7d
--- /dev/null
+++ 
b/test/cases/trace/data/want/eq_service_instance_and_endpoint_order_timestamp_asc.yml
@@ -0,0 +1,58 @@
+# Licensed to Apache Software Foundation (ASF) under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Apache Software Foundation (ASF) licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+traces:
+  - spans:
+    - span: trace_001_span_1
+      spanId: span_001_1
+    - span: trace_001_span_2
+      spanId: span_001_2
+    - span: trace_001_span_3
+      spanId: span_001_3
+    - span: trace_001_span_4
+      spanId: span_001_4
+    - span: trace_001_span_5
+      spanId: span_001_5
+    traceId: trace_001
+  - spans:
+    - span: trace_002_span_1
+      spanId: span_002_1
+    - span: trace_002_span_2
+      spanId: span_002_2
+    - span: trace_002_span_3
+      spanId: span_002_3
+    - span: trace_002_span_4
+      spanId: span_002_4
+    traceId: trace_002
+  - spans:
+    - span: trace_003_span_1
+      spanId: span_003_1
+    - span: trace_003_span_2
+      spanId: span_003_2
+    - span: trace_003_span_3
+      spanId: span_003_3
+    traceId: trace_003
+  - spans:
+    - span: trace_005_span_1
+      spanId: span_005_1
+    - span: trace_005_span_2
+      spanId: span_005_2
+    - span: trace_005_span_3
+      spanId: span_005_3
+    - span: trace_005_span_4
+      spanId: span_005_4
+    traceId: trace_005
diff --git a/test/cases/trace/trace.go b/test/cases/trace/trace.go
index c57ab8656..a22111f77 100644
--- a/test/cases/trace/trace.go
+++ b/test/cases/trace/trace.go
@@ -53,6 +53,7 @@ var _ = g.DescribeTable("Scanning Traces", func(args 
helpers.Args) {
                helpers.Args{Input: "duration_range_and_ipv4_order_timestamp", 
Duration: 1 * time.Hour}),
        g.Entry("filter by service id", helpers.Args{Input: 
"eq_service_order_timestamp_desc", Duration: 1 * time.Hour}),
        g.Entry("filter by service instance id", helpers.Args{Input: 
"eq_service_instance_order_time_asc", Duration: 1 * time.Hour}),
+       g.Entry("filter by service instance id and endpoint id", 
helpers.Args{Input: "eq_service_instance_and_endpoint_order_timestamp_asc", 
Duration: 1 * time.Hour}),
        g.Entry("filter by endpoint", helpers.Args{Input: 
"eq_endpoint_order_duration_asc", Duration: 1 * time.Hour}),
        g.Entry("order by timestamp limit 2", helpers.Args{Input: 
"order_timestamp_desc_limit", Duration: 1 * time.Hour}),
        g.Entry("filter by trace id and service unknown", helpers.Args{Input: 
"eq_trace_id_and_service_unknown", Duration: 1 * time.Hour, WantEmpty: true}),

Reply via email to