anmolnar commented on code in PR #7007:
URL: https://github.com/apache/hbase/pull/7007#discussion_r2127449678


##########
hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java:
##########
@@ -876,6 +890,140 @@ private boolean canAnyOtherBackupCover(List<BackupInfo> 
allBackups, BackupInfo c
       return false;
     }
 
+    /**
+     * Cleans up Write-Ahead Logs (WALs) that are no longer required for PITR 
after a successful
+     * backup deletion.
+     */
+    private void cleanUpUnusedBackupWALs() throws IOException {
+      Configuration conf = getConf() != null ? getConf() : 
HBaseConfiguration.create();
+      String backupWalDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
+
+      if (Strings.isNullOrEmpty(backupWalDir)) {
+        System.out.println("No WAL directory specified for continuous backup. 
Skipping cleanup.");
+        return;
+      }
+
+      try (Connection conn = ConnectionFactory.createConnection(conf);
+        BackupSystemTable sysTable = new BackupSystemTable(conn)) {
+        // Get list of tables under continuous backup
+        Map<TableName, Long> continuousBackupTables = 
sysTable.getContinuousBackupTableSet();
+        if (continuousBackupTables.isEmpty()) {
+          System.out.println("No continuous backups configured. Skipping WAL 
cleanup.");
+          return;
+        }
+
+        // Find the earliest timestamp after which WALs are still needed
+        long cutoffTimestamp = determineWALCleanupCutoffTime(sysTable);
+        if (cutoffTimestamp == 0) {
+          System.err.println("ERROR: No valid full backup found. Skipping WAL 
cleanup.");
+          return;
+        }
+
+        // Update metadata before actual cleanup to avoid inconsistencies
+        updateBackupTableStartTimes(sysTable, cutoffTimestamp);
+
+        // Delete WAL files older than cutoff timestamp
+        deleteOldWALFiles(conf, backupWalDir, cutoffTimestamp);
+
+      }
+    }
+
+    /**
+     * Determines the cutoff time for cleaning WAL files.
+     * @param sysTable Backup system table
+     * @return cutoff timestamp or 0 if not found
+     */
+    private long determineWALCleanupCutoffTime(BackupSystemTable sysTable) 
throws IOException {
+      List<BackupInfo> backupInfos = 
sysTable.getBackupInfos(BackupState.COMPLETE);
+      Collections.reverse(backupInfos); // Start from oldest
+
+      for (BackupInfo backupInfo : backupInfos) {
+        if (BackupType.FULL.equals(backupInfo.getType())) {
+          return backupInfo.getStartTs();
+        }
+      }
+      return 0;
+    }
+
+    /**
+     * Updates the start time for continuous backups if older than cutoff 
timestamp.
+     * @param sysTable        Backup system table
+     * @param cutoffTimestamp Timestamp before which WALs are no longer needed
+     */
+    private void updateBackupTableStartTimes(BackupSystemTable sysTable, long 
cutoffTimestamp)
+      throws IOException {
+
+      Map<TableName, Long> backupTables = 
sysTable.getContinuousBackupTableSet();
+      Set<TableName> tablesToUpdate = new HashSet<>();
+
+      for (Map.Entry<TableName, Long> entry : backupTables.entrySet()) {
+        if (entry.getValue() < cutoffTimestamp) {
+          tablesToUpdate.add(entry.getKey());
+        }
+      }
+
+      if (!tablesToUpdate.isEmpty()) {
+        sysTable.updateContinuousBackupTableSet(tablesToUpdate, 
cutoffTimestamp);
+      }
+    }
+
+    /**
+     * Cleans up old WAL and bulk-loaded files based on the determined cutoff 
timestamp.
+     */
+    private void deleteOldWALFiles(Configuration conf, String backupWalDir, 
long cutoffTime)
+      throws IOException {
+      System.out.println("Starting WAL cleanup in backup directory: " + 
backupWalDir
+        + " with cutoff time: " + cutoffTime);
+
+      BackupFileSystemManager manager =
+        new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, 
backupWalDir);
+      FileSystem fs = manager.getBackupFs();
+      Path walDir = manager.getWalsDir();
+      Path bulkloadDir = manager.getBulkLoadFilesDir();
+
+      SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+      dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+      System.out.println("Listing directories under: " + walDir);
+
+      FileStatus[] directories = fs.listStatus(walDir);
+
+      for (FileStatus dirStatus : directories) {
+        if (!dirStatus.isDirectory()) {
+          continue; // Skip files, we only want directories
+        }
+
+        Path dirPath = dirStatus.getPath();
+        String dirName = dirPath.getName();
+
+        try {
+          long dayStart = parseDayDirectory(dirName, dateFormat);
+          System.out
+            .println("Checking WAL directory: " + dirName + " (Start Time: " + 
dayStart + ")");
+
+          // If WAL files of that day are older than cutoff time, delete them
+          if (dayStart + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
+            System.out.println("Deleting outdated WAL directory: " + dirPath);
+            fs.delete(dirPath, true);

Review Comment:
   Good point guys, but before going down this rabbit hole, please do some 
performance tests for justification. Try to delete 100, 10000 and 1 million 
files in a single directory and share how much time does it take synchronously. 
Delete/unlink operations should be relatively quick in any filesystem, but 
let's see how it works with S3.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to