rdblue commented on code in PR #6527:
URL: https://github.com/apache/iceberg/pull/6527#discussion_r1199816744
##########
core/src/main/java/org/apache/iceberg/deletes/Deletes.java:
##########
@@ -306,41 +357,20 @@ protected CloseableIterator<T>
applyDelete(CloseableIterator<T> items) {
}
private static class DataFileFilter<T extends StructLike> extends Filter<T> {
- private final CharSequence dataLocation;
+ private final CharSequenceSet dataLocation;
- DataFileFilter(CharSequence dataLocation) {
+ DataFileFilter(CharSequenceSet dataLocation) {
this.dataLocation = dataLocation;
}
- @Override
- protected boolean shouldKeep(T posDelete) {
- return charSeqEquals(dataLocation, (CharSequence)
FILENAME_ACCESSOR.get(posDelete));
+ DataFileFilter(CharSequence dataLocation) {
+ this.dataLocation = CharSequenceSet.of(ImmutableList.of(dataLocation));
}
- private boolean charSeqEquals(CharSequence s1, CharSequence s2) {
- if (s1 == s2) {
- return true;
- }
-
- int count = s1.length();
- if (count != s2.length()) {
- return false;
- }
-
- if (s1 instanceof String && s2 instanceof String && s1.hashCode() !=
s2.hashCode()) {
- return false;
- }
-
- // File paths inside a delete file normally have more identical chars at
the beginning. For
- // example, a typical
- // path is like
"s3:/bucket/db/table/data/partition/00000-0-[uuid]-00001.parquet".
- // The uuid is where the difference starts. So it's faster to find the
first diff backward.
- for (int i = count - 1; i >= 0; i--) {
- if (s1.charAt(i) != s2.charAt(i)) {
- return false;
- }
- }
- return true;
+ @Override
+ @SuppressWarnings("CollectionUndefinedEquality")
+ protected boolean shouldKeep(T posDelete) {
+ return dataLocation.contains(FILENAME_ACCESSOR.get(posDelete));
Review Comment:
Why does this remove the optimization above?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]