Copilot commented on code in PR #1016:
URL:
https://github.com/apache/skywalking-banyandb/pull/1016#discussion_r2958926794
##########
banyand/trace/tstable.go:
##########
@@ -153,38 +157,56 @@ func (tst *tsTable) mustWriteSnapshot(snapshot uint64,
partNames []string) {
logger.Panicf("cannot marshal partNames to JSON: %s", err)
}
snapshotPath := filepath.Join(tst.root, snapshotName(snapshot))
- lf, err := tst.fileSystem.CreateLockFile(snapshotPath, storage.FilePerm)
+ snapshotTempPath := snapshotPath + ".tmp"
+ lf, err := tst.fileSystem.CreateLockFile(snapshotTempPath,
storage.FilePerm)
if err != nil {
- logger.Panicf("cannot create lock file %s: %s", snapshotPath,
err)
+ logger.Panicf("cannot create lock file %s: %s",
snapshotTempPath, err)
}
n, err := lf.Write(data)
if err != nil {
- logger.Panicf("cannot write snapshot %s: %s", snapshotPath, err)
+ _ = lf.Close()
+ logger.Panicf("cannot write snapshot %s: %s", snapshotTempPath,
err)
}
if n != len(data) {
- logger.Panicf("unexpected number of bytes written to %s; got
%d; want %d", snapshotPath, n, len(data))
+ _ = lf.Close()
+ logger.Panicf("unexpected number of bytes written to %s; got
%d; want %d", snapshotTempPath, n, len(data))
}
+ if closeErr := lf.Close(); closeErr != nil {
+ logger.Panicf("cannot close snapshot temp file %s: %s",
snapshotTempPath, closeErr)
+ }
+ if renameErr := tst.fileSystem.Rename(snapshotTempPath, snapshotPath);
renameErr != nil {
+ logger.Panicf("cannot rename snapshot %s to %s: %s",
snapshotTempPath, snapshotPath, renameErr)
+ }
+ tst.fileSystem.SyncPath(tst.root)
}
Review Comment:
mustWriteSnapshot now writes to a ".tmp" file (".snp.tmp") and then renames.
If the process crashes between write and rename, that temp file will be left
behind and initTSTable currently ignores it (since filepath.Ext is ".tmp"), so
these can accumulate indefinitely. Consider adding startup cleanup for leftover
snapshot temp files (e.g., delete files matching snapshotSuffix+".tmp") or
choosing a temp naming scheme that initTSTable explicitly handles/removes.
##########
banyand/stream/tstable.go:
##########
@@ -152,38 +156,56 @@ func (tst *tsTable) mustWriteSnapshot(snapshot uint64,
partNames []string) {
logger.Panicf("cannot marshal partNames to JSON: %s", err)
}
snapshotPath := filepath.Join(tst.root, snapshotName(snapshot))
- lf, err := tst.fileSystem.CreateLockFile(snapshotPath, storage.FilePerm)
+ snapshotTempPath := snapshotPath + ".tmp"
+ lf, err := tst.fileSystem.CreateLockFile(snapshotTempPath,
storage.FilePerm)
if err != nil {
- logger.Panicf("cannot create lock file %s: %s", snapshotPath,
err)
+ logger.Panicf("cannot create lock file %s: %s",
snapshotTempPath, err)
}
n, err := lf.Write(data)
if err != nil {
- logger.Panicf("cannot write snapshot %s: %s", snapshotPath, err)
+ _ = lf.Close()
+ logger.Panicf("cannot write snapshot %s: %s", snapshotTempPath,
err)
}
if n != len(data) {
- logger.Panicf("unexpected number of bytes written to %s; got
%d; want %d", snapshotPath, n, len(data))
+ _ = lf.Close()
+ logger.Panicf("unexpected number of bytes written to %s; got
%d; want %d", snapshotTempPath, n, len(data))
+ }
+ if closeErr := lf.Close(); closeErr != nil {
+ logger.Panicf("cannot close snapshot temp file %s: %s",
snapshotTempPath, closeErr)
}
+ if renameErr := tst.fileSystem.Rename(snapshotTempPath, snapshotPath);
renameErr != nil {
+ logger.Panicf("cannot rename snapshot %s to %s: %s",
snapshotTempPath, snapshotPath, renameErr)
+ }
+ tst.fileSystem.SyncPath(tst.root)
}
Review Comment:
mustWriteSnapshot now writes a snapshot to a ".tmp" file (".snp.tmp") and
renames it. If the process crashes before the rename, the temp file will remain
and initTSTable currently ignores it (ext ".tmp"), so these leftovers can
accumulate over time. Consider deleting stale snapshot temp files during
initTSTable or using a temp naming convention that initTSTable explicitly
cleans up.
##########
banyand/trace/tstable.go:
##########
@@ -262,9 +284,20 @@ func initTSTable(fileSystem fs.FileSystem, rootPath
string, p common.Position,
sort.Slice(loadedSnapshots, func(i, j int) bool {
return loadedSnapshots[i] > loadedSnapshots[j]
})
- epoch := loadedSnapshots[0]
- tst.loadSnapshot(epoch, loadedParts)
- return &tst, epoch
+ var failedSnapshots []uint64
+ for _, epoch := range loadedSnapshots {
+ loadErr := tst.loadSnapshot(epoch, loadedParts)
+ if loadErr == nil {
+ return &tst, epoch
+ }
+ tst.l.Warn().Err(loadErr).Uint64("epoch", epoch).Msg("cannot
load snapshot, trying next older")
+ failedSnapshots = append(failedSnapshots, epoch)
+ }
+ for _, id := range failedSnapshots {
+ tst.l.Info().Str("path", filepath.Join(rootPath,
snapshotName(id))).Msg("delete unreadable snapshot file")
+ fileSystem.MustRMAll(filepath.Join(rootPath, snapshotName(id)))
+ }
Review Comment:
initTSTable collects failedSnapshots but returns immediately on the first
successfully loaded older snapshot, so any newer unreadable snapshots
encountered earlier are never deleted. This will cause a warning (and fallback
scan) on every restart. If the intent is to clean up corrupt snapshots, delete
each failed snapshot immediately (or defer cleanup before returning on success).
##########
banyand/stream/tstable.go:
##########
@@ -273,9 +295,20 @@ func initTSTable(fileSystem fs.FileSystem, rootPath
string, p common.Position,
sort.Slice(loadedSnapshots, func(i, j int) bool {
return loadedSnapshots[i] > loadedSnapshots[j]
})
- epoch := loadedSnapshots[0]
- tst.loadSnapshot(epoch, loadedParts)
- return &tst, epoch, nil
+ var failedSnapshots []uint64
+ for _, epoch := range loadedSnapshots {
+ loadErr := tst.loadSnapshot(epoch, loadedParts)
+ if loadErr == nil {
+ return &tst, epoch, nil
+ }
+ tst.l.Warn().Err(loadErr).Uint64("epoch", epoch).Msg("cannot
load snapshot, trying next older")
+ failedSnapshots = append(failedSnapshots, epoch)
+ }
+ for _, id := range failedSnapshots {
+ tst.l.Info().Str("path", filepath.Join(rootPath,
snapshotName(id))).Msg("delete unreadable snapshot file")
+ fileSystem.MustRMAll(filepath.Join(rootPath, snapshotName(id)))
+ }
Review Comment:
initTSTable appends epochs to failedSnapshots, but because it returns as
soon as one older snapshot loads successfully, any newer unreadable snapshots
are never removed. This will keep emitting warnings and doing fallback work on
every restart. Consider removing a failed snapshot immediately after
loadSnapshot fails (or ensure cleanup runs before the successful return).
##########
banyand/measure/tstable.go:
##########
@@ -117,9 +117,20 @@ func initTSTable(fileSystem fs.FileSystem, rootPath
string, p common.Position,
sort.Slice(loadedSnapshots, func(i, j int) bool {
return loadedSnapshots[i] > loadedSnapshots[j]
})
- epoch := loadedSnapshots[0]
- tst.loadSnapshot(epoch, loadedParts)
- return &tst, epoch
+ var failedSnapshots []uint64
+ for _, epoch := range loadedSnapshots {
+ loadErr := tst.loadSnapshot(epoch, loadedParts)
+ if loadErr == nil {
+ return &tst, epoch
+ }
+ tst.l.Warn().Err(loadErr).Uint64("epoch", epoch).Msg("cannot
load snapshot, trying next older")
+ failedSnapshots = append(failedSnapshots, epoch)
+ }
+ for _, id := range failedSnapshots {
+ tst.l.Info().Str("path", filepath.Join(rootPath,
snapshotName(id))).Msg("delete unreadable snapshot file")
+ fileSystem.MustRMAll(filepath.Join(rootPath, snapshotName(id)))
+ }
Review Comment:
initTSTable records failedSnapshots but returns immediately when an older
snapshot loads, so any newer unreadable snapshots encountered earlier are never
deleted. This will cause repeated warnings and fallback scanning on every
restart. If cleanup is desired, delete the failed snapshot right after
loadSnapshot fails, or run the cleanup before returning on success (e.g., via
defer).
##########
banyand/measure/tstable.go:
##########
@@ -235,38 +250,56 @@ func (tst *tsTable) mustWriteSnapshot(snapshot uint64,
partNames []string) {
logger.Panicf("cannot marshal partNames to JSON: %s", err)
}
snapshotPath := filepath.Join(tst.root, snapshotName(snapshot))
- lf, err := tst.fileSystem.CreateLockFile(snapshotPath, storage.FilePerm)
+ snapshotTempPath := snapshotPath + ".tmp"
+ lf, err := tst.fileSystem.CreateLockFile(snapshotTempPath,
storage.FilePerm)
if err != nil {
- logger.Panicf("cannot create lock file %s: %s", snapshotPath,
err)
+ logger.Panicf("cannot create lock file %s: %s",
snapshotTempPath, err)
}
n, err := lf.Write(data)
if err != nil {
- logger.Panicf("cannot write snapshot %s: %s", snapshotPath, err)
+ _ = lf.Close()
+ logger.Panicf("cannot write snapshot %s: %s", snapshotTempPath,
err)
}
if n != len(data) {
- logger.Panicf("unexpected number of bytes written to %s; got
%d; want %d", snapshotPath, n, len(data))
+ _ = lf.Close()
+ logger.Panicf("unexpected number of bytes written to %s; got
%d; want %d", snapshotTempPath, n, len(data))
}
+ if closeErr := lf.Close(); closeErr != nil {
+ logger.Panicf("cannot close snapshot temp file %s: %s",
snapshotTempPath, closeErr)
+ }
+ if renameErr := tst.fileSystem.Rename(snapshotTempPath, snapshotPath);
renameErr != nil {
+ logger.Panicf("cannot rename snapshot %s to %s: %s",
snapshotTempPath, snapshotPath, renameErr)
+ }
+ tst.fileSystem.SyncPath(tst.root)
}
Review Comment:
mustWriteSnapshot writes to snapshotPath+".tmp" and then renames. If a crash
happens before rename, the ".snp.tmp" file will be left behind and initTSTable
currently ignores it (ext ".tmp"), so temp files can accumulate over time.
Consider cleaning up stale snapshot temp files during initTSTable or adopting a
temp naming scheme that initTSTable explicitly detects/removes.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]