Re: [PR] fix(storage): prevent epoch segment creation from zero timestamps [skywalking-banyandb]

via GitHub Thu, 09 Apr 2026 17:57:41 -0700


Copilot commented on code in PR #1059:
URL: 
https://github.com/apache/skywalking-banyandb/pull/1059#discussion_r3061454000



##########
banyand/internal/storage/segment.go:
##########
@@ -477,21 +481,25 @@ func (sc *segmentController[T, O]) parse(value string) 
(time.Time, error) {
 func (sc *segmentController[T, O]) open() error {
        sc.Lock()
        defer sc.Unlock()
-       emptySegments := make([]string, 0)
+       invalidSegments := make([]string, 0)
        err := loadSegments(sc.location, segPathPrefix, sc, 
sc.getOptions().SegmentInterval, func(start, end time.Time) error {
                suffix := sc.format(start)
                segmentPath := path.Join(sc.location, fmt.Sprintf(segTemplate, 
suffix))
+               if start.Year() < 2000 {
+                       invalidSegments = append(invalidSegments, segmentPath)
+                       return nil
+               }

Review Comment:
   The new invalid-segment detection treats any segment whose start time has 
Year() < 2000 as corrupt and schedules it for deletion. This is broader than 
the rest of the codebase’s timestamp validation (pkg/timestamp.Check allows 
pre-2000 times) and can lead to legitimate historical segments being deleted on 
startup. Consider narrowing the condition to what actually indicates the 
reported corruption (e.g., start.UnixNano() <= 0, or matching the 
epoch/near-epoch segment suffixes produced by a zero MinTimestamp) so valid 
pre-2000 data isn’t removed.



##########
banyand/internal/storage/rotation.go:
##########
@@ -32,6 +32,9 @@ var (
 )
 
 func (d *database[T, O]) Tick(ts int64) {
+       if ts <= 0 {
+               return
+       }

Review Comment:
   The new early-return for ts <= 0 changes Tick() behavior but isn’t covered 
by the existing rotation tests. Add a unit test asserting that Tick(0) (and/or 
negative values) does not update latestTickTime and does not enqueue a rotation 
event, to prevent regressions and clarify the intended contract.



##########
banyand/internal/storage/segment.go:
##########
@@ -509,16 +517,19 @@ func (sc *segmentController[T, O]) open() error {
                _, loadErr := sc.load(start, segmentEnd, sc.location)
                return loadErr
        })
-       if len(emptySegments) > 0 {
-               sc.l.Warn().Strs("segments", emptySegments).Msg("empty segments 
found, removing them.")
-               for i := range emptySegments {
-                       sc.lfs.MustRMAll(emptySegments[i])
+       if len(invalidSegments) > 0 {
+               sc.l.Warn().Strs("segments", invalidSegments).Msg("invalid 
segments found (empty or epoch-dated), removing them")
+               for i := range invalidSegments {
+                       sc.lfs.MustRMAll(invalidSegments[i])
                }
        }
        return err
 }
 
 func (sc *segmentController[T, O]) create(start time.Time) (*segment[T, O], 
error) {
+       if start.Year() < 2000 {
+               return nil, ErrInvalidSegmentTimestamp
+       }

Review Comment:
   segmentController.create rejects any start time with Year() < 2000. This 
introduces a new implicit timestamp policy that differs from 
pkg/timestamp.Check (which permits pre-2000) and will prevent segment creation 
(and potentially break writes/queries) for legitimate historical timestamps. It 
would be safer to gate only the corrupted cases caused by zero/near-zero 
timestamps (e.g., start.UnixNano() <= 0) or to reuse a single shared timestamp 
validation rule across the codebase.



##########
banyand/internal/storage/tsdb_test.go:
##########
@@ -335,29 +334,68 @@ func TestTakeFileSnapshot(t *testing.T) {
                require.NoError(t, err)
                defer tsdb.Close()
 
-               normalSeg, err := tsdb.CreateSegmentIfNotExist(ts)
-               require.NoError(t, err)
-               normalSegLocation := normalSeg.(*segment[*SnapshotMockTSTable, 
any]).location
+               normalSeg, segErr := tsdb.CreateSegmentIfNotExist(ts)
+               require.NoError(t, segErr)
                normalSeg.DecRef()
 
-               epochSeg, err := tsdb.CreateSegmentIfNotExist(time.Unix(0, 0))
-               require.NoError(t, err)
-               epochSegLocation := epochSeg.(*segment[*SnapshotMockTSTable, 
any]).location
-               epochSeg.DecRef()
+               _, epochErr := tsdb.CreateSegmentIfNotExist(time.Unix(0, 0))
+               require.ErrorIs(t, epochErr, ErrInvalidSegmentTimestamp,
+                       "creating a segment with epoch timestamp should be 
rejected")
+       })
+}
 
-               created, snapshotErr := tsdb.TakeFileSnapshot(snapshotDir)
-               require.NoError(t, snapshotErr,
-                       "snapshot should not fail due to empty shard in epoch 
segment")
-               require.True(t, created)
+func TestEpochSegmentCleanupOnOpen(t *testing.T) {
+       logger.Init(logger.Logging{
+               Env:   "dev",
+               Level: flags.LogLevel,
+       })
 
-               normalSegDir := filepath.Join(snapshotDir, 
filepath.Base(normalSegLocation))
-               require.DirExists(t, normalSegDir,
-                       "normal segment should be present in snapshot")
+       dir, defFn := test.Space(require.New(t))
+       defer defFn()
 
-               epochSegDir := filepath.Join(snapshotDir, 
filepath.Base(epochSegLocation))
-               require.DirExists(t, epochSegDir,
-                       "epoch segment directory should still be created")
-       })
+       opts := TSDBOpts[*MockTSTable, any]{
+               Location:        dir,
+               SegmentInterval: IntervalRule{Unit: DAY, Num: 1},
+               TTL:             IntervalRule{Unit: DAY, Num: 7},
+               ShardNum:        1,
+               TSTableCreator:  MockTSTableCreator,
+       }
+
+       ctx := context.Background()
+       mc := timestamp.NewMockClock()
+       ts, err := time.ParseInLocation("2006-01-02 15:04:05", "2024-05-01 
00:00:00", time.Local)
+       require.NoError(t, err)
+       mc.Set(ts)
+       ctx = timestamp.SetClock(ctx, mc)
+
+       // Create a TSDB with a valid segment
+       sc := NewServiceCache()
+       tsdb1, err := OpenTSDB(ctx, opts, sc, group)
+       require.NoError(t, err)
+
+       seg, segErr := tsdb1.CreateSegmentIfNotExist(ts)
+       require.NoError(t, segErr)
+       seg.DecRef()
+       require.NoError(t, tsdb1.Close())
+
+       // Manually create a fake epoch segment directory with metadata to 
simulate the bug
+       epochSegDir := filepath.Join(dir, "seg-19700101")
+       lfs := fs.NewLocalFileSystem()
+       lfs.MkdirIfNotExist(epochSegDir, DirPerm)
+       metaPath := filepath.Join(epochSegDir, metadataFilename)
+       lf, lockErr := lfs.CreateLockFile(metaPath, FilePerm)
+       require.NoError(t, lockErr)
+       _, writeErr := 
lf.Write([]byte(`{"version":"v1","end_time":"1970-01-02T00:00:00Z"}`))
+       require.NoError(t, writeErr)

Review Comment:
   The test creates and locks the epoch segment metadata file via 
CreateLockFile but never closes it. On platforms with mandatory locks (notably 
Windows), this can prevent OpenTSDB from removing the directory during cleanup 
and can also leak file descriptors. Close the lock file (and handle the Close 
error) after writing so the cleanup behavior is reliably testable 
cross-platform.
   ```suggestion
        require.NoError(t, writeErr)
        closeErr := lf.Close()
        require.NoError(t, closeErr)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] fix(storage): prevent epoch segment creation from zero timestamps [skywalking-banyandb]

Reply via email to