klsince commented on a change in pull request #7301: URL: https://github.com/apache/pinot/pull/7301#discussion_r690617694
########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java ########## @@ -97,17 +98,14 @@ public void close() @Override public void removeIndex(String columnName, ColumnIndexType indexType) { - File indexFile = getFileFor(columnName, indexType); - if (indexFile.delete()) { Review comment: 1. file.delete() can't remove folder, while TextIndex is put in a folder. 2. TextIndex has two parts: the index data in folder; and another mapping file. ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/IndexEntry.java ########## @@ -27,7 +28,7 @@ class IndexEntry { private static Logger LOGGER = LoggerFactory.getLogger(IndexEntry.class); - IndexKey key; + final IndexKey key; Review comment: renamed them here https://github.com/apache/pinot/pull/7316 to keep this PR short. ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/IndexKey.java ########## @@ -26,11 +26,11 @@ /** * Class representing index name and type */ -public class IndexKey { +public class IndexKey implements Comparable<IndexKey> { private static Logger LOGGER = LoggerFactory.getLogger(IndexKey.class); - String name; - ColumnIndexType type; + final String name; Review comment: reformated those in another quick PR: https://github.com/apache/pinot/pull/7316/files ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/IndexKey.java ########## @@ -46,7 +46,7 @@ public boolean equals(Object o) { if (this == o) { return true; } - if (o == null || getClass() != o.getClass()) { + if (!(o instanceof IndexKey)) { Review comment: good to know!! will revert this. ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java ########## @@ -329,47 +323,67 @@ private void persistIndexMap(IndexEntry entry) throws IOException { File mapFile = new File(_segmentDirectory, V1Constants.INDEX_MAP_FILE_NAME); try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(mapFile, true)))) { - String startKey = getKey(entry.key.name, entry.key.type.getIndexName(), true); - - StringBuilder sb = new StringBuilder(); - sb.append(startKey).append(" = ").append(entry.startOffset); - writer.println(sb.toString()); - - String endKey = getKey(entry.key.name, entry.key.type.getIndexName(), false); - sb = new StringBuilder(); - sb.append(endKey).append(" = ").append(entry.size); - writer.println(sb.toString()); + persistIndexMap(entry, writer); } } - private String getKey(String column, String indexName, boolean isStartOffset) { - return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ? "startOffset" : "size"); - } - private String allocationContext(IndexKey key) { return this.getClass().getSimpleName() + key.toString(); } + /** + * This method sweeps the indices marked for removal. Exception is simply bubbled up w/o + * trying to recover disk states from failure. This method is expected to run during segment + * reloading, which has failure handling by creating a backup folder before doing reloading. + */ + private void cleanupRemovedIndices() + throws IOException { + if (!_shouldCleanupRemovedIndices) { Review comment: np ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java ########## @@ -329,47 +323,67 @@ private void persistIndexMap(IndexEntry entry) throws IOException { File mapFile = new File(_segmentDirectory, V1Constants.INDEX_MAP_FILE_NAME); try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(mapFile, true)))) { - String startKey = getKey(entry.key.name, entry.key.type.getIndexName(), true); - - StringBuilder sb = new StringBuilder(); - sb.append(startKey).append(" = ").append(entry.startOffset); - writer.println(sb.toString()); - - String endKey = getKey(entry.key.name, entry.key.type.getIndexName(), false); - sb = new StringBuilder(); - sb.append(endKey).append(" = ").append(entry.size); - writer.println(sb.toString()); + persistIndexMap(entry, writer); } } - private String getKey(String column, String indexName, boolean isStartOffset) { - return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ? "startOffset" : "size"); - } - private String allocationContext(IndexKey key) { return this.getClass().getSimpleName() + key.toString(); } + /** + * This method sweeps the indices marked for removal. Exception is simply bubbled up w/o + * trying to recover disk states from failure. This method is expected to run during segment + * reloading, which has failure handling by creating a backup folder before doing reloading. + */ + private void cleanupRemovedIndices() + throws IOException { + if (!_shouldCleanupRemovedIndices) { + return; + } + + File tmpIdxFile = new File(_segmentDirectory, V1Constants.INDEX_FILE_NAME + ".tmp"); + // Sort indices by column name and index type while copying, so that the + // new index_map file is easy to inspect for troubleshooting. + List<IndexEntry> retained = copyIndicesTo(_indexFile, new TreeMap<>(_columnEntries), tmpIdxFile); Review comment: I kinda prefer to leave this to caller, so that they can control data layout; and leave this method to do one thing - copying. lemme know your thoughts. ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java ########## @@ -387,4 +401,53 @@ public boolean isIndexRemovalSupported() { public String toString() { return _segmentDirectory.toString() + "/" + _indexFile.toString(); } + + @VisibleForTesting + static List<IndexEntry> copyIndicesTo(File indexFile, Map<IndexKey, IndexEntry> toBeRetained, File tmpIdxFile) + throws IOException { + // Copy index from original index file and append to temp file. + // Keep track of the index entry pointing to the temp index file. + List<IndexEntry> retained = new ArrayList<>(); + long nextOffset = 0; + // With FileChannel, we can seek to the data flexibly. + try (FileChannel srcCh = new RandomAccessFile(indexFile, "r").getChannel(); + FileChannel dstCh = new RandomAccessFile(tmpIdxFile, "rw").getChannel()) { + for (Map.Entry<IndexKey, IndexEntry> next : toBeRetained.entrySet()) { + IndexKey key = next.getKey(); + IndexEntry idxPos = next.getValue(); + copyIndexTo(srcCh, idxPos.startOffset, idxPos.size, dstCh); Review comment: inlined next.getKey(), but will keep idxPos as it's referred 4 times here. idxPos as short for `index position`. lemme know how you'd like name it :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org