rmdmattingly commented on code in PR #6370: URL: https://github.com/apache/hbase/pull/6370#discussion_r1839010140
########## hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreFactory.java: ########## @@ -43,8 +44,13 @@ private BackupRestoreFactory() { * @return backup restore job instance */ public static RestoreJob getRestoreJob(Configuration conf) { + Class<? extends RestoreJob> defaultCls = + conf.getBoolean(RestoreJob.KEEP_ORIGINAL_SPLITS_OPT, false) Review Comment: We probably have more flexibility with this feature being in beta. It might be good if we had a way to flag the split plan of backups via some metadata (or lack thereof) and could block any invalid ancestry across incompatible backups ########## hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java: ########## @@ -197,55 +202,56 @@ protected List<byte[]> handleBulkLoad(List<TableName> sTableList) throws IOExcep } } } + mergeSplitBulkloads(activeFiles, archiveFiles, srcTable); + incrementalCopyBulkloadHFiles(tgtFs, srcTable); } - - copyBulkLoadedFiles(activeFiles, archiveFiles); - return pair.getSecond(); } - private void copyBulkLoadedFiles(List<String> activeFiles, List<String> archiveFiles) - throws IOException { - try { - // Enable special mode of BackupDistCp - conf.setInt(MapReduceBackupCopyJob.NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 5); - // Copy active files - String tgtDest = backupInfo.getBackupRootDir() + Path.SEPARATOR + backupInfo.getBackupId(); - int attempt = 1; - while (activeFiles.size() > 0) { - LOG.info("Copy " + activeFiles.size() + " active bulk loaded files. Attempt =" + attempt++); - String[] toCopy = new String[activeFiles.size()]; - activeFiles.toArray(toCopy); - // Active file can be archived during copy operation, - // we need to handle this properly - try { - incrementalCopyHFiles(toCopy, tgtDest); - break; - } catch (IOException e) { - // Check if some files got archived - // Update active and archived lists - // When file is being moved from active to archive - // directory, the number of active files decreases - int numOfActive = activeFiles.size(); - updateFileLists(activeFiles, archiveFiles); - if (activeFiles.size() < numOfActive) { - continue; - } - // if not - throw exception - throw e; + private void mergeSplitBulkloads(List<String> activeFiles, List<String> archiveFiles, + TableName tn) throws IOException { + int attempt = 1; + + while (!activeFiles.isEmpty()) { + LOG.info("MergeSplit {} active bulk loaded files. Attempt={}", activeFiles.size(), attempt++); + // Active file can be archived during copy operation, + // we need to handle this properly + try { + mergeSplitBulkloads(activeFiles, tn); + break; + } catch (IOException e) { + int numActiveFiles = activeFiles.size(); + updateFileLists(activeFiles, archiveFiles); + if (activeFiles.size() < numActiveFiles) { + continue; } + + throw e; } - // If incremental copy will fail for archived files - // we will have partially loaded files in backup destination (only files from active data - // directory). It is OK, because the backup will marked as FAILED and data will be cleaned up - if (archiveFiles.size() > 0) { - String[] toCopy = new String[archiveFiles.size()]; - archiveFiles.toArray(toCopy); - incrementalCopyHFiles(toCopy, tgtDest); + } + + if (!archiveFiles.isEmpty()) { + mergeSplitBulkloads(archiveFiles, tn); + } + } + + private void mergeSplitBulkloads(List<String> files, TableName tn) throws IOException { + MapReduceHFileSplitterJob player = new MapReduceHFileSplitterJob(); + conf.set(MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY, + getBulkOutputDirForTable(tn).toString()); + player.setConf(conf); + + String inputDirs = StringUtils.join(files, ","); + String[] args = { inputDirs, tn.getNameWithNamespaceInclAsString() }; + + try { + int result = player.run(args); + if (result != 0) { + throw new RuntimeException("Failed to run MapReduceHFileSplitterJob"); } - } finally { - // Disable special mode of BackupDistCp - conf.unset(MapReduceBackupCopyJob.NUMBER_OF_LEVELS_TO_PRESERVE_KEY); + } catch (Exception e) { + LOG.error(e.toString(), e); Review Comment: logging the error twice is probably redundant. maybe can provide a more specific message -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org