Copilot commented on code in PR #1059:
URL:
https://github.com/apache/skywalking-banyandb/pull/1059#discussion_r3061454000
##########
banyand/internal/storage/segment.go:
##########
@@ -477,21 +481,25 @@ func (sc *segmentController[T, O]) parse(value string)
(time.Time, error) {
func (sc *segmentController[T, O]) open() error {
sc.Lock()
defer sc.Unlock()
- emptySegments := make([]string, 0)
+ invalidSegments := make([]string, 0)
err := loadSegments(sc.location, segPathPrefix, sc,
sc.getOptions().SegmentInterval, func(start, end time.Time) error {
suffix := sc.format(start)
segmentPath := path.Join(sc.location, fmt.Sprintf(segTemplate,
suffix))
+ if start.Year() < 2000 {
+ invalidSegments = append(invalidSegments, segmentPath)
+ return nil
+ }
Review Comment:
The new invalid-segment detection treats any segment whose start time has
Year() < 2000 as corrupt and schedules it for deletion. This is broader than
the rest of the codebase’s timestamp validation (pkg/timestamp.Check allows
pre-2000 times) and can lead to legitimate historical segments being deleted on
startup. Consider narrowing the condition to what actually indicates the
reported corruption (e.g., start.UnixNano() <= 0, or matching the
epoch/near-epoch segment suffixes produced by a zero MinTimestamp) so valid
pre-2000 data isn’t removed.
##########
banyand/internal/storage/rotation.go:
##########
@@ -32,6 +32,9 @@ var (
)
func (d *database[T, O]) Tick(ts int64) {
+ if ts <= 0 {
+ return
+ }
Review Comment:
The new early-return for ts <= 0 changes Tick() behavior but isn’t covered
by the existing rotation tests. Add a unit test asserting that Tick(0) (and/or
negative values) does not update latestTickTime and does not enqueue a rotation
event, to prevent regressions and clarify the intended contract.
##########
banyand/internal/storage/segment.go:
##########
@@ -509,16 +517,19 @@ func (sc *segmentController[T, O]) open() error {
_, loadErr := sc.load(start, segmentEnd, sc.location)
return loadErr
})
- if len(emptySegments) > 0 {
- sc.l.Warn().Strs("segments", emptySegments).Msg("empty segments
found, removing them.")
- for i := range emptySegments {
- sc.lfs.MustRMAll(emptySegments[i])
+ if len(invalidSegments) > 0 {
+ sc.l.Warn().Strs("segments", invalidSegments).Msg("invalid
segments found (empty or epoch-dated), removing them")
+ for i := range invalidSegments {
+ sc.lfs.MustRMAll(invalidSegments[i])
}
}
return err
}
func (sc *segmentController[T, O]) create(start time.Time) (*segment[T, O],
error) {
+ if start.Year() < 2000 {
+ return nil, ErrInvalidSegmentTimestamp
+ }
Review Comment:
segmentController.create rejects any start time with Year() < 2000. This
introduces a new implicit timestamp policy that differs from
pkg/timestamp.Check (which permits pre-2000) and will prevent segment creation
(and potentially break writes/queries) for legitimate historical timestamps. It
would be safer to gate only the corrupted cases caused by zero/near-zero
timestamps (e.g., start.UnixNano() <= 0) or to reuse a single shared timestamp
validation rule across the codebase.
##########
banyand/internal/storage/tsdb_test.go:
##########
@@ -335,29 +334,68 @@ func TestTakeFileSnapshot(t *testing.T) {
require.NoError(t, err)
defer tsdb.Close()
- normalSeg, err := tsdb.CreateSegmentIfNotExist(ts)
- require.NoError(t, err)
- normalSegLocation := normalSeg.(*segment[*SnapshotMockTSTable,
any]).location
+ normalSeg, segErr := tsdb.CreateSegmentIfNotExist(ts)
+ require.NoError(t, segErr)
normalSeg.DecRef()
- epochSeg, err := tsdb.CreateSegmentIfNotExist(time.Unix(0, 0))
- require.NoError(t, err)
- epochSegLocation := epochSeg.(*segment[*SnapshotMockTSTable,
any]).location
- epochSeg.DecRef()
+ _, epochErr := tsdb.CreateSegmentIfNotExist(time.Unix(0, 0))
+ require.ErrorIs(t, epochErr, ErrInvalidSegmentTimestamp,
+ "creating a segment with epoch timestamp should be
rejected")
+ })
+}
- created, snapshotErr := tsdb.TakeFileSnapshot(snapshotDir)
- require.NoError(t, snapshotErr,
- "snapshot should not fail due to empty shard in epoch
segment")
- require.True(t, created)
+func TestEpochSegmentCleanupOnOpen(t *testing.T) {
+ logger.Init(logger.Logging{
+ Env: "dev",
+ Level: flags.LogLevel,
+ })
- normalSegDir := filepath.Join(snapshotDir,
filepath.Base(normalSegLocation))
- require.DirExists(t, normalSegDir,
- "normal segment should be present in snapshot")
+ dir, defFn := test.Space(require.New(t))
+ defer defFn()
- epochSegDir := filepath.Join(snapshotDir,
filepath.Base(epochSegLocation))
- require.DirExists(t, epochSegDir,
- "epoch segment directory should still be created")
- })
+ opts := TSDBOpts[*MockTSTable, any]{
+ Location: dir,
+ SegmentInterval: IntervalRule{Unit: DAY, Num: 1},
+ TTL: IntervalRule{Unit: DAY, Num: 7},
+ ShardNum: 1,
+ TSTableCreator: MockTSTableCreator,
+ }
+
+ ctx := context.Background()
+ mc := timestamp.NewMockClock()
+ ts, err := time.ParseInLocation("2006-01-02 15:04:05", "2024-05-01
00:00:00", time.Local)
+ require.NoError(t, err)
+ mc.Set(ts)
+ ctx = timestamp.SetClock(ctx, mc)
+
+ // Create a TSDB with a valid segment
+ sc := NewServiceCache()
+ tsdb1, err := OpenTSDB(ctx, opts, sc, group)
+ require.NoError(t, err)
+
+ seg, segErr := tsdb1.CreateSegmentIfNotExist(ts)
+ require.NoError(t, segErr)
+ seg.DecRef()
+ require.NoError(t, tsdb1.Close())
+
+ // Manually create a fake epoch segment directory with metadata to
simulate the bug
+ epochSegDir := filepath.Join(dir, "seg-19700101")
+ lfs := fs.NewLocalFileSystem()
+ lfs.MkdirIfNotExist(epochSegDir, DirPerm)
+ metaPath := filepath.Join(epochSegDir, metadataFilename)
+ lf, lockErr := lfs.CreateLockFile(metaPath, FilePerm)
+ require.NoError(t, lockErr)
+ _, writeErr :=
lf.Write([]byte(`{"version":"v1","end_time":"1970-01-02T00:00:00Z"}`))
+ require.NoError(t, writeErr)
Review Comment:
The test creates and locks the epoch segment metadata file via
CreateLockFile but never closes it. On platforms with mandatory locks (notably
Windows), this can prevent OpenTSDB from removing the directory during cleanup
and can also leak file descriptors. Close the lock file (and handle the Close
error) after writing so the cleanup behavior is reliably testable
cross-platform.
```suggestion
require.NoError(t, writeErr)
closeErr := lf.Close()
require.NoError(t, closeErr)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]