This is an automated email from the ASF dual-hosted git repository.

hanahmily pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git


The following commit(s) were added to refs/heads/main by this push:
     new 1c954da7c fix(storage): disable rotation task on warm and cold 
lifecycle nodes (#1055)
1c954da7c is described below

commit 1c954da7c5bd2b0dfe559d88c741c6bce45d98e5
Author: Gao Hongtao <[email protected]>
AuthorDate: Thu Apr 9 19:47:00 2026 +0800

    fix(storage): disable rotation task on warm and cold lifecycle nodes (#1055)
    
    * fix(storage): disable rotation task on warm and cold lifecycle nodes
    
    The rotation task uses NextTime(eventTime) which produces incorrect
    segment boundaries for multi-day intervals (e.g., 3-day warm stage
    interval). Warm and cold nodes receive data via lifecycle migration
    which creates segments on demand via CreateSegmentIfNotExist with
    correct boundaries. Disabling rotation on non-hot nodes prevents
    the issue.
---
 CHANGES.md                                |  1 +
 banyand/internal/storage/rotation.go      |  2 +-
 banyand/internal/storage/rotation_test.go | 69 +++++++++++++++++++++++++++++++
 banyand/internal/storage/tsdb.go          |  3 ++
 banyand/measure/metadata.go               |  4 ++
 banyand/stream/metadata.go                |  4 ++
 banyand/trace/metadata.go                 |  4 ++
 docs/concept/rotation.md                  |  9 ++++
 docs/operation/lifecycle.md               |  9 ++++
 9 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index d7959c781..3bbc2e08c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -22,6 +22,7 @@ Release Notes.
 - Fix flaky on-disk integration tests caused by Ginkgo v2 random container 
shuffling closing gRPC connections prematurely.
 - Fix snapshot error when there is no data in a segment.
 - ui: fix query editor refresh/reset behavior and BydbQL keyword highlighting.
+- Disable the rotation task on warm and cold nodes to prevent incorrect 
segment boundaries during lifecycle migration.
 
 ## 0.10.0
 
diff --git a/banyand/internal/storage/rotation.go 
b/banyand/internal/storage/rotation.go
index 7d1a737aa..fd30f3edd 100644
--- a/banyand/internal/storage/rotation.go
+++ b/banyand/internal/storage/rotation.go
@@ -100,7 +100,7 @@ func (d *database[T, O]) startRotationTask() error {
                                                gap := latest.End.UnixNano() - 
ts
                                                // gap <=0 means the event is 
from the future
                                                // the segment will be created 
by a written event directly
-                                               if gap <= 0 || gap > 
newSegmentTimeGap {
+                                               if gap <= 0 || gap > 
newSegmentTimeGap || d.disableRotation {
                                                        return
                                                }
                                                d.incTotalRotationStarted(1)
diff --git a/banyand/internal/storage/rotation_test.go 
b/banyand/internal/storage/rotation_test.go
index e9e003916..b9251a357 100644
--- a/banyand/internal/storage/rotation_test.go
+++ b/banyand/internal/storage/rotation_test.go
@@ -238,3 +238,72 @@ func (m *MockMetrics) Factory() observability.Factory {
 }
 
 var MockMetricsCreator = func(_ common.Position) Metrics { return 
&MockMetrics{} }
+
+func TestRotationDisabled(t *testing.T) {
+       t.Run("rotation should not create segments when disabled", func(t 
*testing.T) {
+               dir, defFn := test.Space(require.New(t))
+               defer defFn()
+
+               TSDBOpts := TSDBOpts[*MockTSTable, any]{
+                       Location:        dir,
+                       SegmentInterval: IntervalRule{Unit: DAY, Num: 3},
+                       TTL:             IntervalRule{Unit: DAY, Num: 30},
+                       ShardNum:        1,
+                       TSTableCreator:  MockTSTableCreator,
+                       DisableRotation: true,
+               }
+               ctx := context.Background()
+               mc := timestamp.NewMockClock()
+               ts, err := time.ParseInLocation("2006-01-02 15:04:05", 
"2024-05-01 00:00:00", time.Local)
+               require.NoError(t, err)
+               mc.Set(ts)
+               ctx = timestamp.SetClock(ctx, mc)
+
+               sc := NewServiceCache()
+               tsdb, err := OpenTSDB(ctx, TSDBOpts, sc, group)
+               require.NoError(t, err)
+               defer tsdb.Close()
+
+               // Create initial segment for day 1
+               seg, err := tsdb.CreateSegmentIfNotExist(ts)
+               require.NoError(t, err)
+               seg.DecRef()
+
+               db := tsdb.(*database[*MockTSTable, any])
+               segCtrl := db.segmentController
+
+               // Simulate data arriving day by day for 6 days (mimicking 
lifecycle migration)
+               // Day 1 (05-01): already in segment [05-01, 05-04)
+               // Day 2 (05-02): still in [05-01, 05-04)
+               // Day 3 (05-03): still in [05-01, 05-04)
+               // Day 4 (05-04): need new segment [05-04, 05-07)
+               for day := 0; day < 6; day++ {
+                       dayTime := ts.AddDate(0, 0, day)
+                       mc.Set(dayTime)
+                       // Simulate what migration does: 
CreateSegmentIfNotExist then Tick
+                       seg, segErr := tsdb.CreateSegmentIfNotExist(dayTime)
+                       require.NoError(t, segErr)
+                       seg.DecRef()
+                       // Tick with max timestamp near end of day (like 
migration does with ctx.MaxTimestamp)
+                       maxTS := dayTime.Add(23*time.Hour + 59*time.Minute + 
59*time.Second)
+                       tsdb.Tick(maxTS.UnixNano())
+               }
+
+               // Verify: only 2 segments created, both with correct 3-day 
boundaries
+               segments, _ := segCtrl.segments(false)
+               defer func() {
+                       for i := range segments {
+                               segments[i].DecRef()
+                       }
+               }()
+               require.Equal(t, 2, len(segments), "expected 2 segments for 6 
days with 3-day interval")
+
+               // First segment: [05-01, 05-04)
+               assert.Equal(t, "2024-05-01 00:00:00", 
segments[0].Start.Format("2006-01-02 15:04:05"))
+               assert.Equal(t, "2024-05-04 00:00:00", 
segments[0].End.Format("2006-01-02 15:04:05"))
+
+               // Second segment: [05-04, 05-07)
+               assert.Equal(t, "2024-05-04 00:00:00", 
segments[1].Start.Format("2006-01-02 15:04:05"))
+               assert.Equal(t, "2024-05-07 00:00:00", 
segments[1].End.Format("2006-01-02 15:04:05"))
+       })
+}
diff --git a/banyand/internal/storage/tsdb.go b/banyand/internal/storage/tsdb.go
index ca6b74ba9..7e7993085 100644
--- a/banyand/internal/storage/tsdb.go
+++ b/banyand/internal/storage/tsdb.go
@@ -63,6 +63,7 @@ type TSDBOpts[T TSTable, O any] struct {
        SeriesIndexCacheMaxBytes       int
        ShardNum                       uint32
        DisableRetention               bool
+       DisableRotation                bool
        SegmentIdleTimeout             time.Duration
        MemoryLimit                    uint64
 }
@@ -157,6 +158,7 @@ type database[T TSTable, O any] struct {
        rotationProcessOn atomic.Bool
        closed            atomic.Bool
        disableRetention  bool
+       disableRotation   bool
 }
 
 func (d *database[T, O]) Close() error {
@@ -244,6 +246,7 @@ func OpenTSDB[T TSTable, O any](ctx context.Context, opts 
TSDBOpts[T, O], cache
                return nil, err
        }
        obsservice.MetricsCollector.Register(location, db.collect)
+       db.disableRotation = opts.DisableRotation
        return db, db.startRotationTask()
 }
 
diff --git a/banyand/measure/metadata.go b/banyand/measure/metadata.go
index 524286c41..51bc324a7 100644
--- a/banyand/measure/metadata.go
+++ b/banyand/measure/metadata.go
@@ -682,6 +682,7 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
        segInterval := ro.SegmentInterval
        segmentIdleTimeout := time.Duration(0)
        disableRetention := false
+       disableRotation := false
        if len(ro.Stages) > 0 && len(s.nodeLabels) > 0 {
                var ttlNum uint32
                foundMatched := false
@@ -705,10 +706,12 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
                                segmentIdleTimeout = 5 * time.Minute
                        }
                        disableRetention = i+1 < len(ro.Stages)
+                       disableRotation = true
                        break
                }
                if !foundMatched {
                        disableRetention = true
+                       disableRotation = true
                }
        }
        group := groupSchema.Metadata.Name
@@ -725,6 +728,7 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
                StorageMetricsFactory:          factory,
                SegmentIdleTimeout:             segmentIdleTimeout,
                DisableRetention:               disableRetention,
+               DisableRotation:                disableRotation,
                MemoryLimit:                    s.pm.GetLimit(),
        }
        return storage.OpenTSDB(
diff --git a/banyand/stream/metadata.go b/banyand/stream/metadata.go
index 50bfd08aa..e7bbe3c82 100644
--- a/banyand/stream/metadata.go
+++ b/banyand/stream/metadata.go
@@ -537,6 +537,7 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
        segInterval := ro.SegmentInterval
        segmentIdleTimeout := time.Duration(0)
        disableRetention := false
+       disableRotation := false
        if len(ro.Stages) > 0 && len(s.nodeLabels) > 0 {
                var ttlNum uint32
                foundMatched := false
@@ -560,10 +561,12 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
                                segmentIdleTimeout = 5 * time.Minute
                        }
                        disableRetention = i+1 < len(ro.Stages)
+                       disableRotation = true
                        break
                }
                if !foundMatched {
                        disableRetention = true
+                       disableRotation = true
                }
        }
        group := groupSchema.Metadata.Name
@@ -580,6 +583,7 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
                StorageMetricsFactory:          
s.omr.With(storageScope.ConstLabels(meter.ToLabelPairs(common.DBLabelNames(), 
p.DBLabelValues()))),
                SegmentIdleTimeout:             segmentIdleTimeout,
                DisableRetention:               disableRetention,
+               DisableRotation:                disableRotation,
                MemoryLimit:                    s.pm.GetLimit(),
        }
        return storage.OpenTSDB(
diff --git a/banyand/trace/metadata.go b/banyand/trace/metadata.go
index 51e63b1d4..5f4e0fc17 100644
--- a/banyand/trace/metadata.go
+++ b/banyand/trace/metadata.go
@@ -560,6 +560,7 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
        segInterval := ro.SegmentInterval
        segmentIdleTimeout := time.Duration(0)
        disableRetention := false
+       disableRotation := false
        if len(ro.Stages) > 0 && len(s.nodeLabels) > 0 {
                var ttlNum uint32
                foundMatched := false
@@ -583,10 +584,12 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
                                segmentIdleTimeout = 5 * time.Minute
                        }
                        disableRetention = i+1 < len(ro.Stages)
+                       disableRotation = true
                        break
                }
                if !foundMatched {
                        disableRetention = true
+                       disableRotation = true
                }
        }
        group := groupSchema.Metadata.Name
@@ -603,6 +606,7 @@ func (s *supplier) OpenDB(groupSchema *commonv1.Group) 
(resourceSchema.DB, error
                StorageMetricsFactory:          
s.omr.With(traceScope.ConstLabels(meter.ToLabelPairs(common.DBLabelNames(), 
p.DBLabelValues()))),
                SegmentIdleTimeout:             segmentIdleTimeout,
                DisableRetention:               disableRetention,
+               DisableRotation:                disableRotation,
                MemoryLimit:                    s.pm.GetLimit(),
        }
        return storage.OpenTSDB(
diff --git a/docs/concept/rotation.md b/docs/concept/rotation.md
index 7d0a7ab80..0675ff87f 100644
--- a/docs/concept/rotation.md
+++ b/docs/concept/rotation.md
@@ -136,6 +136,15 @@ S = 2
 
 So, **2 segments** are required to retain data for 7 days with an 8-day 
segment interval. 2 segments are the minimum number whatever the TTL and 
segment interval are. When the TTL is less than the segment interval, you can 
have the minimum number of segments.
 
+## Rotation in Lifecycle Deployments
+
+In a hot-warm-cold lifecycle deployment, data rotation behaves differently 
depending on the node's role:
+
+- **Hot nodes**: Rotation is enabled. The rotation task pre-creates segments 
based on the hot stage's `segment_interval`.
+- **Warm and cold nodes**: Rotation is disabled. Segments are created on 
demand by the lifecycle migration process via `CreateSegmentIfNotExist`, using 
the target stage's `segment_interval`. Since the rotation task uses 
`NextTime(eventTime)` which produces incorrect boundaries for multi-day 
intervals, disabling it on warm/cold nodes ensures segments are always created 
with correct boundaries.
+
+Additionally, data retention (TTL-based segment deletion) is disabled on 
non-last-stage nodes, since the lifecycle agent manages data migration between 
stages instead.
+
 ## Conclusion
 
 Data rotation is a critical aspect of managing data in BanyanDB. By 
understanding the relationship between the number of segments, segment 
interval, and TTL, you can effectively manage data retention and query 
performance in the database. The formula provided here offers a simple way to 
calculate the number of segments required based on the chosen segment interval 
and TTL.
diff --git a/docs/operation/lifecycle.md b/docs/operation/lifecycle.md
index 0da3edd4c..686d4bb02 100644
--- a/docs/operation/lifecycle.md
+++ b/docs/operation/lifecycle.md
@@ -132,6 +132,15 @@ lifecycle \
 | `--etcd-endpoints`    | Endpoints for etcd connections                       
                         | `""`                           |
 | `--schedule`          | Schedule for periodic backup (e.g., @yearly, 
@monthly, @weekly, @daily, etc.) | `""`                           |
 
+## Automatic Behavior on Warm and Cold Nodes
+
+When a data node matches a lifecycle stage (i.e., its labels match a stage's 
`node_selector`), two behaviors are automatically configured:
+
+- **Rotation is disabled**: The rotation task (which pre-creates segments on 
hot nodes) is not started on warm or cold nodes. Segments are created on demand 
by the lifecycle migration process, ensuring correct segment boundaries for 
multi-day intervals.
+- **Retention is disabled on non-last stages**: TTL-based segment deletion is 
disabled on all stages except the last one. Data removal on intermediate stages 
is managed by the lifecycle migration process, not by the built-in retention 
mechanism.
+
+If a node's labels do not match any stage, both rotation and retention are 
disabled as a safety measure.
+
 ## Best Practices
 
 1. **Node Labeling:**

Reply via email to