This is an automated email from the ASF dual-hosted git repository.

hanahmily pushed a commit to branch bug/trace-cond
in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git

commit 820f97f423f6c2fc3c9112ca78857f5a55d72503
Author: Hongtao Gao <[email protected]>
AuthorDate: Mon Mar 30 08:00:16 2026 +0000

    fix(trace): fix entity tag handling in trace filter to prevent TagIdx index 
mismatch
    
    Entity tags (like service_instance_id) are handled by series routing and 
are not
    stored as tag data in SIDX blocks. Prevent them from being added to 
collectedTagNames,
    which would shift TagIdx values in conditionSchema and cause wrong tag 
values to be
    looked up when filtering non-entity tags. Add unit test and integration 
test case to
    validate the fix.
---
 CHANGES.md                                         |  1 +
 pkg/query/logical/trace/index_filter.go            | 21 ++++---
 pkg/query/logical/trace/index_filter_test.go       | 67 ++++++++++++++++++++++
 ...ce_instance_and_endpoint_order_timestamp_asc.ql | 22 +++++++
 ...e_instance_and_endpoint_order_timestamp_asc.yml | 39 +++++++++++++
 ...e_instance_and_endpoint_order_timestamp_asc.yml | 58 +++++++++++++++++++
 test/cases/trace/trace.go                          |  1 +
 7 files changed, 198 insertions(+), 11 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 554063268..0a6999fc8 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -59,6 +59,7 @@ Release Notes.
 - Fix segment reference leaks in measure/stream/trace queries and ensure 
chunked sync sessions close part contexts correctly.
 - Fix duplicate query execution in distributed measure Agg+TopN queries by 
enabling push-down aggregation, removing the wasteful double-query pattern.
 - Fix nil pointer panic in segment collectMetrics during shutdown.
+- Fix entity tag handling in trace filter to prevent TagIdx index mismatch 
when filtering with both entity and non-entity tags.
 
 ### Document
 
diff --git a/pkg/query/logical/trace/index_filter.go 
b/pkg/query/logical/trace/index_filter.go
index 2a606df2f..7947b8814 100644
--- a/pkg/query/logical/trace/index_filter.go
+++ b/pkg/query/logical/trace/index_filter.go
@@ -348,24 +348,23 @@ func buildFilterFromCondition(cond *modelv1.Condition, 
schema logical.Schema, ta
                }
        }
 
-       if cond.Name == traceIDTagName && (cond.Op == 
modelv1.Condition_BINARY_OP_EQ || cond.Op == modelv1.Condition_BINARY_OP_IN) {
-               traceIDs = extractIDsFromCondition(cond)
-       } else if cond.Name != spanIDTagName {
-               collectedTagNames = append(collectedTagNames, cond.Name)
-       }
-
-       _, parsedEntity, err := logical.ParseExprOrEntity(entityDict, entity, 
cond)
+       // Check entity condition first: entity tags are handled by series 
routing and are NOT stored
+       // as tag data in SIDX blocks. They must not be added to 
collectedTagNames, as that would
+       // shift TagIdx values in conditionSchema and cause wrong tag values to 
be looked up.
+       expr, parsedEntity, err := logical.ParseExprOrEntity(entityDict, 
entity, cond)
        if err != nil {
                return nil, nil, collectedTagNames, traceIDs, minVal, maxVal, 
err
        }
        if parsedEntity != nil {
                return nil, parsedEntity, collectedTagNames, traceIDs, minVal, 
maxVal, nil
        }
-       // For trace, all non-entity tags have skipping index
-       expr, _, err := logical.ParseExprOrEntity(entityDict, entity, cond)
-       if err != nil {
-               return nil, nil, collectedTagNames, traceIDs, minVal, maxVal, 
err
+       // Non-entity condition: add to collectedTagNames (but skip traceID and 
spanID special cases)
+       if cond.Name == traceIDTagName && (cond.Op == 
modelv1.Condition_BINARY_OP_EQ || cond.Op == modelv1.Condition_BINARY_OP_IN) {
+               traceIDs = extractIDsFromCondition(cond)
+       } else if cond.Name != spanIDTagName {
+               collectedTagNames = append(collectedTagNames, cond.Name)
        }
+       // For trace, all non-entity tags have skipping index
        filter, entities, err := parseConditionToFilter(cond, schema, entity, 
expr)
        return filter, entities, collectedTagNames, traceIDs, minVal, maxVal, 
err
 }
diff --git a/pkg/query/logical/trace/index_filter_test.go 
b/pkg/query/logical/trace/index_filter_test.go
index fac72009b..2f6843b2c 100644
--- a/pkg/query/logical/trace/index_filter_test.go
+++ b/pkg/query/logical/trace/index_filter_test.go
@@ -283,6 +283,73 @@ func TestTraceHavingFilterIntegration(t *testing.T) {
        assert.False(t, shouldSkip, "should not skip when services are found")
 }
 
+// TestBuildFilterEntityTagNotInCollectedTagNames verifies that entity tags 
(e.g. serviceInstanceId)
+// are NOT included in collectedTagNames. Entity tags are handled by series 
routing and are not
+// stored as tag data in SIDX blocks. Including them shifts TagIdx values in 
conditionSchema,
+// causing wrong tag values to be looked up when filtering non-entity tags 
like traceState.
+func TestBuildFilterEntityTagNotInCollectedTagNames(t *testing.T) {
+       trace := &databasev1.Trace{
+               Metadata: &commonv1.Metadata{Name: "test", Group: "default"},
+               Tags: []*databasev1.TraceTagSpec{
+                       {Name: "trace_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "span_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "timestamp", Type: 
databasev1.TagType_TAG_TYPE_TIMESTAMP},
+                       {Name: "service_instance_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "service_id", Type: 
databasev1.TagType_TAG_TYPE_STRING},
+                       {Name: "state", Type: databasev1.TagType_TAG_TYPE_INT},
+               },
+               TraceIdTagName:   "trace_id",
+               SpanIdTagName:    "span_id",
+               TimestampTagName: "timestamp",
+       }
+       traceSchema, schemaErr := BuildSchema(trace, nil)
+       assert.NoError(t, schemaErr)
+
+       // service_instance_id is an entity tag
+       entityDict := map[string]int{
+               "service_id":          0,
+               "service_instance_id": 1,
+       }
+       entity := []*modelv1.TagValue{
+               {Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: ""}}},
+               {Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: ""}}},
+       }
+
+       // Condition: service_instance_id = "X" AND state = 0
+       // service_instance_id is an entity tag: must NOT appear in 
collectedTagNames.
+       // state is a regular tag: must appear in collectedTagNames.
+       criteria := &modelv1.Criteria{
+               Exp: &modelv1.Criteria_Le{
+                       Le: &modelv1.LogicalExpression{
+                               Op: modelv1.LogicalExpression_LOGICAL_OP_AND,
+                               Left: &modelv1.Criteria{
+                                       Exp: &modelv1.Criteria_Condition{
+                                               Condition: &modelv1.Condition{
+                                                       Name:  
"service_instance_id",
+                                                       Op:    
modelv1.Condition_BINARY_OP_EQ,
+                                                       Value: 
&modelv1.TagValue{Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: 
"instance-1"}}},
+                                               },
+                                       },
+                               },
+                               Right: &modelv1.Criteria{
+                                       Exp: &modelv1.Criteria_Condition{
+                                               Condition: &modelv1.Condition{
+                                                       Name:  "state",
+                                                       Op:    
modelv1.Condition_BINARY_OP_EQ,
+                                                       Value: 
&modelv1.TagValue{Value: &modelv1.TagValue_Int{Int: &modelv1.Int{Value: 0}}},
+                                               },
+                                       },
+                               },
+                       },
+               },
+       }
+       _, _, collectedTagNames, _, _, _, buildErr := buildTraceFilter( 
//nolint:dogsled
+               criteria, traceSchema, entityDict, entity, "trace_id", 
"span_id", "")
+       assert.NoError(t, buildErr)
+       assert.Equal(t, []string{"state"}, collectedTagNames,
+               "entity tag service_instance_id must not be in 
collectedTagNames; only non-entity tag state should appear")
+}
+
 // TestBuildFilterDeduplicatesCollectedTagNames verifies that buildFilter 
deduplicates tag names
 // when criteria has multiple conditions on the same tag (e.g. duration > 100 
AND duration < 200).
 func TestBuildFilterDeduplicatesCollectedTagNames(t *testing.T) {
diff --git 
a/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.ql
 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.ql
new file mode 100644
index 000000000..02d80e113
--- /dev/null
+++ 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.ql
@@ -0,0 +1,22 @@
+# Licensed to Apache Software Foundation (ASF) under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Apache Software Foundation (ASF) licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+SELECT () FROM TRACE sw IN test-trace-group
+TIME > '-15m'
+WHERE service_instance_id = 'webapp_instance_1' AND endpoint_id = 
'/home_endpoint'
+ORDER BY timestamp ASC
diff --git 
a/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.yml
 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.yml
new file mode 100644
index 000000000..7b2bc26c6
--- /dev/null
+++ 
b/test/cases/trace/data/input/eq_service_instance_and_endpoint_order_timestamp_asc.yml
@@ -0,0 +1,39 @@
+# Licensed to Apache Software Foundation (ASF) under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Apache Software Foundation (ASF) licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: "sw"
+groups: ["test-trace-group"]
+criteria:
+  le:
+    op: "LOGICAL_OP_AND"
+    left:
+      condition:
+        name: "service_instance_id"
+        op: "BINARY_OP_EQ"
+        value:
+          str:
+            value: "webapp_instance_1"
+    right:
+      condition:
+        name: "endpoint_id"
+        op: "BINARY_OP_EQ"
+        value:
+          str:
+            value: "/home_endpoint"
+order_by:
+  index_rule_name: "timestamp"
+  sort: "SORT_ASC"
diff --git 
a/test/cases/trace/data/want/eq_service_instance_and_endpoint_order_timestamp_asc.yml
 
b/test/cases/trace/data/want/eq_service_instance_and_endpoint_order_timestamp_asc.yml
new file mode 100644
index 000000000..bc4b55b7d
--- /dev/null
+++ 
b/test/cases/trace/data/want/eq_service_instance_and_endpoint_order_timestamp_asc.yml
@@ -0,0 +1,58 @@
+# Licensed to Apache Software Foundation (ASF) under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Apache Software Foundation (ASF) licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+traces:
+  - spans:
+    - span: trace_001_span_1
+      spanId: span_001_1
+    - span: trace_001_span_2
+      spanId: span_001_2
+    - span: trace_001_span_3
+      spanId: span_001_3
+    - span: trace_001_span_4
+      spanId: span_001_4
+    - span: trace_001_span_5
+      spanId: span_001_5
+    traceId: trace_001
+  - spans:
+    - span: trace_002_span_1
+      spanId: span_002_1
+    - span: trace_002_span_2
+      spanId: span_002_2
+    - span: trace_002_span_3
+      spanId: span_002_3
+    - span: trace_002_span_4
+      spanId: span_002_4
+    traceId: trace_002
+  - spans:
+    - span: trace_003_span_1
+      spanId: span_003_1
+    - span: trace_003_span_2
+      spanId: span_003_2
+    - span: trace_003_span_3
+      spanId: span_003_3
+    traceId: trace_003
+  - spans:
+    - span: trace_005_span_1
+      spanId: span_005_1
+    - span: trace_005_span_2
+      spanId: span_005_2
+    - span: trace_005_span_3
+      spanId: span_005_3
+    - span: trace_005_span_4
+      spanId: span_005_4
+    traceId: trace_005
diff --git a/test/cases/trace/trace.go b/test/cases/trace/trace.go
index c57ab8656..a22111f77 100644
--- a/test/cases/trace/trace.go
+++ b/test/cases/trace/trace.go
@@ -53,6 +53,7 @@ var _ = g.DescribeTable("Scanning Traces", func(args 
helpers.Args) {
                helpers.Args{Input: "duration_range_and_ipv4_order_timestamp", 
Duration: 1 * time.Hour}),
        g.Entry("filter by service id", helpers.Args{Input: 
"eq_service_order_timestamp_desc", Duration: 1 * time.Hour}),
        g.Entry("filter by service instance id", helpers.Args{Input: 
"eq_service_instance_order_time_asc", Duration: 1 * time.Hour}),
+       g.Entry("filter by service instance id and endpoint id", 
helpers.Args{Input: "eq_service_instance_and_endpoint_order_timestamp_asc", 
Duration: 1 * time.Hour}),
        g.Entry("filter by endpoint", helpers.Args{Input: 
"eq_endpoint_order_duration_asc", Duration: 1 * time.Hour}),
        g.Entry("order by timestamp limit 2", helpers.Args{Input: 
"order_timestamp_desc_limit", Duration: 1 * time.Hour}),
        g.Entry("filter by trace id and service unknown", helpers.Args{Input: 
"eq_trace_id_and_service_unknown", Duration: 1 * time.Hour, WantEmpty: true}),

Reply via email to