aokolnychyi commented on code in PR #8157:
URL: https://github.com/apache/iceberg/pull/8157#discussion_r1275546173
##########
core/src/main/java/org/apache/iceberg/DeleteFileIndex.java:
##########
@@ -569,4 +570,153 @@ private
Iterable<CloseableIterable<ManifestEntry<DeleteFile>>> deleteManifestRea
.liveEntries());
}
}
+
+ // a group of indexed delete files sorted by the sequence number they apply
to
+ private static class DeleteFileGroup {
+ private final long[] seqs;
+ private final IndexedDeleteFile[] files;
+
+ DeleteFileGroup(IndexedDeleteFile[] files) {
+ this.seqs =
Arrays.stream(files).mapToLong(IndexedDeleteFile::applySequenceNumber).toArray();
+ this.files = files;
+ }
+
+ DeleteFileGroup(long[] seqs, IndexedDeleteFile[] files) {
+ this.seqs = seqs;
+ this.files = files;
+ }
+
+ public Stream<IndexedDeleteFile> limit(long seq) {
+ return limitBySequenceNumber(seq, seqs, files);
+ }
+
+ public Iterable<DeleteFile> referencedDeleteFiles() {
+ return
Arrays.stream(files).map(IndexedDeleteFile::wrapped).collect(Collectors.toList());
+ }
+ }
+
+ // a delete file wrapper that caches the converted boundaries for faster
boundary checks
+ // this class is not meant to be exposed beyond the delete file index
+ private static class IndexedDeleteFile {
+ private final PartitionSpec spec;
+ private final DeleteFile wrapped;
+ private final long applySequenceNumber;
+ private volatile Map<Integer, Object> convertedLowerBounds = null;
+ private volatile Map<Integer, Object> convertedUpperBounds = null;
+
+ IndexedDeleteFile(PartitionSpec spec, DeleteFile file, long
applySequenceNumber) {
+ this.spec = spec;
+ this.wrapped = file;
+ this.applySequenceNumber = applySequenceNumber;
+ }
+
+ IndexedDeleteFile(PartitionSpec spec, DeleteFile file) {
+ this.spec = spec;
+ this.wrapped = file;
+
+ if (file.content() == FileContent.EQUALITY_DELETES) {
+ this.applySequenceNumber = file.dataSequenceNumber() - 1;
+ } else {
+ this.applySequenceNumber = file.dataSequenceNumber();
+ }
+ }
+
+ public DeleteFile wrapped() {
+ return wrapped;
+ }
+
+ public long applySequenceNumber() {
+ return applySequenceNumber;
+ }
+
+ public FileContent content() {
+ return wrapped.content();
+ }
+
+ public List<Integer> equalityFieldIds() {
+ return wrapped.equalityFieldIds();
+ }
+
+ public Map<Integer, Long> valueCounts() {
+ return wrapped.valueCounts();
+ }
+
+ public Map<Integer, Long> nullValueCounts() {
+ return wrapped.nullValueCounts();
+ }
+
+ public Map<Integer, Long> nanValueCounts() {
+ return wrapped.nanValueCounts();
+ }
+
+ public boolean hasNoBounds() {
+ return wrapped.lowerBounds() == null || wrapped.upperBounds() == null;
+ }
+
+ public boolean hasBounds() {
+ return wrapped.lowerBounds() != null && wrapped.upperBounds() != null;
+ }
+
+ @SuppressWarnings("unchecked")
+ public <T> T lowerBound(int id) {
+ return (T) lowerBounds().get(id);
+ }
+
+ private Map<Integer, Object> lowerBounds() {
+ if (convertedLowerBounds == null) {
+ synchronized (this) {
+ if (convertedLowerBounds == null) {
+ this.convertedLowerBounds = convertBounds(wrapped.lowerBounds());
Review Comment:
I am still debating. We probably need a concurrent hash map to load each
value one by one, right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]