This is an automated email from the ASF dual-hosted git repository.

cshannon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 53683e4701 Implement estimateOverlappingEntries in fenced files (#4626)
53683e4701 is described below

commit 53683e4701d1f049e6c7698489a3b8716ad5b143
Author: Christopher L. Shannon <cshan...@apache.org>
AuthorDate: Sat Jun 1 15:56:58 2024 -0400

    Implement estimateOverlappingEntries in fenced files (#4626)
    
    This adds support for estimating overlapping entries in fenced RFiles.
    This works by clipping the KeyExtent provided with the fenced file range
    to find the overlapping section to use for computing the estimated
    entries. Row range validation has been narrowed and renamed to
    validate that the range is what will be generated by a KeyExtent
    toDataRange
    
    This closes #4611
    
    ---------
    
    Co-authored-by: Keith Turner <ktur...@apache.org>
---
 .../apache/accumulo/core/client/rfile/RFile.java   |  4 +-
 .../apache/accumulo/core/dataImpl/KeyExtent.java   | 61 ++++++++++++++++
 .../org/apache/accumulo/core/file/rfile/RFile.java |  8 +-
 .../accumulo/core/metadata/AbstractTabletFile.java | 48 ++----------
 .../accumulo/core/metadata/StoredTabletFile.java   |  7 +-
 .../apache/accumulo/core/util/RowRangeUtil.java    | 85 ++++++++++++++++++++++
 .../core/client/rfile/RFileClientTest.java         | 11 +--
 .../apache/accumulo/core/data/KeyExtentTest.java   | 43 +++++++++++
 .../core/file/rfile/AbstractRFileTest.java         | 37 +++++++++-
 .../accumulo/core/file/rfile/FencedRFileTest.java  | 72 +++++++++++++++++-
 .../apache/accumulo/core/file/rfile/RFileTest.java | 33 +++++++++
 .../metadata/schema/ReferencedTabletFileTest.java  | 22 ++++--
 .../constraints/MetadataConstraintsTest.java       | 15 ++--
 .../java/org/apache/accumulo/test/CloneIT.java     |  3 +-
 .../java/org/apache/accumulo/test/VolumeIT.java    |  7 +-
 15 files changed, 383 insertions(+), 73 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java 
b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java
index 9b61e53c21..64956dcbc2 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java
@@ -36,8 +36,8 @@ import 
org.apache.accumulo.core.client.summary.Summary.FileStatistics;
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.metadata.AbstractTabletFile;
 import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.RowRangeUtil;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
@@ -100,7 +100,7 @@ public class RFile {
 
       public FencedPath(Path path, Range fence) {
         this.path = Objects.requireNonNull(path);
-        this.fence = AbstractTabletFile.requireRowRange(fence);
+        this.fence = RowRangeUtil.requireKeyExtentDataRange(fence);
       }
 
       public Path getPath() {
diff --git 
a/core/src/main/java/org/apache/accumulo/core/dataImpl/KeyExtent.java 
b/core/src/main/java/org/apache/accumulo/core/dataImpl/KeyExtent.java
index 434c0f7b89..b5bdb1f744 100644
--- a/core/src/main/java/org/apache/accumulo/core/dataImpl/KeyExtent.java
+++ b/core/src/main/java/org/apache/accumulo/core/dataImpl/KeyExtent.java
@@ -20,6 +20,8 @@ package org.apache.accumulo.core.dataImpl;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static java.util.Objects.requireNonNull;
+import static 
org.apache.accumulo.core.util.RowRangeUtil.requireKeyExtentDataRange;
+import static org.apache.accumulo.core.util.RowRangeUtil.stripZeroTail;
 
 import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
@@ -388,6 +390,65 @@ public class KeyExtent implements Comparable<KeyExtent> {
     return new Range(metadataPrevRow, prevEndRow() == null, toMetaRow(), true);
   }
 
+  /**
+   * Creates a KeyExtent which represents the intersection of this KeyExtent 
and the passed in
+   * range.
+   * <p>
+   * <b>Note:</b> The range provided must be a range that is derived from a 
KeyExtent. This means
+   * the range must be in the format of a row range and also requires an 
exclusive start key, which
+   * is the format that {@link #toDataRange()} uses
+   *
+   * @param range range to clip to
+   * @return the intersection of this KeyExtent and the given range
+   * @throws IllegalArgumentException if the KeyExtent and range do not overlap
+   */
+  public KeyExtent clip(Range range) {
+    return clip(range, false);
+  }
+
+  /**
+   * Creates a KeyExtent which represents the intersection of this KeyExtent 
and the passed in
+   * range. Unlike {@link #clip(Range)}, this method can optionally return 
null if the given range
+   * and this KeyExtent do not overlap, instead of throwing an exception. The 
returnNullIfDisjoint
+   * parameter controls this behavior.
+   * <p>
+   * <b>Note:</b> The range provided must be a range that is derived from a 
KeyExtent. This means
+   * the range must be in the format of a row range and also requires an 
exclusive start key, which
+   * is the format that {@link #toDataRange()} uses
+   *
+   * @param range range to clip to
+   * @param returnNullIfDisjoint true to return null if ranges are disjoint, 
false to throw an
+   *        exception
+   * @return the intersection of this KeyExtent and the given range, or null 
if given range and this
+   *         KeyExtent do not overlap and returnNullIfDisjoint is true
+   * @throws IllegalArgumentException if the KeyExtent and range does not 
overlap and
+   *         returnNullIfDisjoint is false
+   *
+   * @see KeyExtent#clip(Range)
+   **/
+  public KeyExtent clip(Range range, boolean returnNullIfDisjoint) {
+    // This will require a range that matches a row range generated by 
toDataRange()
+    // This range itself will be required to be an inclusive start and 
exclusive end
+    // The start and end rows will be required to be exclusive keys (ending in 
0x00)
+    requireKeyExtentDataRange(range);
+
+    // If returnNullIfDisjoint is false then this will throw an exception if
+    // the ranges are disjoint, otherwise we can just return null
+    final Range clippedRange = this.toDataRange().clip(range, 
returnNullIfDisjoint);
+    if (clippedRange == null) {
+      return null;
+    }
+
+    // Build the new KeyExtent with the clipped range. We need to strip off 
the ending byte
+    // which will essentially reverse what toDataRange() does
+    Text endRow = clippedRange.getEndKey() != null
+        ? new 
Text(stripZeroTail(clippedRange.getEndKey().getRowData()).toArray()) : null;
+    Text prevEndRow = clippedRange.getStartKey() != null
+        ? new 
Text(stripZeroTail(clippedRange.getStartKey().getRowData()).toArray()) : null;
+
+    return new KeyExtent(tableId, endRow, prevEndRow);
+  }
+
   private boolean startsAfter(KeyExtent other) {
     KeyExtent nke = requireNonNull(other);
     return tableId().compareTo(nke.tableId()) > 0 || (prevEndRow() != null && 
nke.endRow() != null
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java 
b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index 2772e91659..00d01aad9f 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@ -1808,8 +1808,12 @@ public class RFile {
     }
 
     @Override
-    public long estimateOverlappingEntries(KeyExtent extent) throws 
IOException {
-      return reader.estimateOverlappingEntries(extent);
+    public long estimateOverlappingEntries(KeyExtent c) throws IOException {
+      KeyExtent overlapping = c.clip(fence, true);
+      if (overlapping == null) {
+        return 0;
+      }
+      return reader.estimateOverlappingEntries(overlapping);
     }
 
     @Override
diff --git 
a/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java 
b/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java
index ae0d46fbf2..aff3ee2514 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/metadata/AbstractTabletFile.java
@@ -18,15 +18,14 @@
  */
 package org.apache.accumulo.core.metadata;
 
+import static 
org.apache.accumulo.core.util.RowRangeUtil.requireKeyExtentDataRange;
+import static org.apache.accumulo.core.util.RowRangeUtil.stripZeroTail;
+
 import java.util.Objects;
 
-import org.apache.accumulo.core.data.ByteSequence;
-import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.hadoop.fs.Path;
 
-import com.google.common.base.Preconditions;
-
 /**
  * A base class used to represent file references that are handled by code 
that processes tablet
  * files.
@@ -41,7 +40,7 @@ public abstract class AbstractTabletFile<T extends 
AbstractTabletFile<T>>
 
   protected AbstractTabletFile(Path path, Range range) {
     this.path = Objects.requireNonNull(path);
-    this.range = requireRowRange(range);
+    this.range = requireKeyExtentDataRange(range);
   }
 
   @Override
@@ -59,44 +58,13 @@ public abstract class AbstractTabletFile<T extends 
AbstractTabletFile<T>>
     return !range.isInfiniteStartKey() || !range.isInfiniteStopKey();
   }
 
-  public static Range requireRowRange(Range range) {
-    if (!range.isInfiniteStartKey()) {
-      Preconditions.checkArgument(range.isStartKeyInclusive() && 
isOnlyRowSet(range.getStartKey()),
-          "Range is not a row range %s", range);
-    }
-
-    if (!range.isInfiniteStopKey()) {
-      Preconditions.checkArgument(!range.isEndKeyInclusive() && 
isOnlyRowSet(range.getEndKey())
-          && isExclusiveKey(range.getEndKey()), "Range is not a row range %s", 
range);
-    }
-
-    return range;
-  }
-
-  private static boolean isOnlyRowSet(Key key) {
-    return key.getColumnFamilyData().length() == 0 && 
key.getColumnQualifierData().length() == 0
-        && key.getColumnVisibilityData().length() == 0 && key.getTimestamp() 
== Long.MAX_VALUE;
-  }
-
-  private static boolean isExclusiveKey(Key key) {
-    var row = key.getRowData();
-    return row.length() > 0 && row.byteAt(row.length() - 1) == (byte) 0x00;
-  }
-
-  private static String stripZeroTail(ByteSequence row) {
-    if (row.byteAt(row.length() - 1) == (byte) 0x00) {
-      return row.subSequence(0, row.length() - 1).toString();
-    }
-    return row.toString();
-  }
-
   @Override
   public String toMinimalString() {
     if (hasRange()) {
-      String startRow =
-          range.isInfiniteStartKey() ? "-inf" : 
stripZeroTail(range.getStartKey().getRowData());
-      String endRow =
-          range.isInfiniteStopKey() ? "+inf" : 
stripZeroTail(range.getEndKey().getRowData());
+      String startRow = range.isInfiniteStartKey() ? "-inf"
+          : stripZeroTail(range.getStartKey().getRowData()).toString();
+      String endRow = range.isInfiniteStopKey() ? "+inf"
+          : stripZeroTail(range.getEndKey().getRowData()).toString();
       return getFileName() + " (" + startRow + "," + endRow + "]";
     } else {
       return getFileName();
diff --git 
a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java 
b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
index 16bb0458a9..0f66759ca2 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
@@ -18,6 +18,8 @@
  */
 package org.apache.accumulo.core.metadata;
 
+import static 
org.apache.accumulo.core.util.RowRangeUtil.requireKeyExtentDataRange;
+
 import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
@@ -155,7 +157,7 @@ public class StoredTabletFile extends 
AbstractTabletFile<StoredTabletFile> {
     // Validate the path
     ReferencedTabletFile.parsePath(deserialize(metadataEntry).path);
     // Validate the range
-    requireRowRange(tabletFileCq.range);
+    requireKeyExtentDataRange(tabletFileCq.range);
   }
 
   public static StoredTabletFile of(final Text metadataEntry) {
@@ -198,6 +200,7 @@ public class StoredTabletFile extends 
AbstractTabletFile<StoredTabletFile> {
     // Recreate the exact Range that was originally stored in Metadata. Stored 
ranges are originally
     // constructed with inclusive/exclusive for the start and end key 
inclusivity settings.
     // (Except for Ranges with no start/endkey as then the inclusivity flags 
do not matter)
+    // The ranges must match the format of KeyExtent.toDataRange()
     //
     // With this particular constructor, when setting the startRowInclusive to 
true and
     // endRowInclusive to false, both the start and end row values will be 
taken as is
@@ -215,7 +218,7 @@ public class StoredTabletFile extends 
AbstractTabletFile<StoredTabletFile> {
   }
 
   public static String serialize(String path, Range range) {
-    requireRowRange(range);
+    requireKeyExtentDataRange(range);
     final TabletFileCqMetadataGson metadata = new TabletFileCqMetadataGson();
     metadata.path = Objects.requireNonNull(path);
     metadata.startRow = encodeRow(range.getStartKey());
diff --git a/core/src/main/java/org/apache/accumulo/core/util/RowRangeUtil.java 
b/core/src/main/java/org/apache/accumulo/core/util/RowRangeUtil.java
new file mode 100644
index 0000000000..d61e17b2c7
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/util/RowRangeUtil.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.core.util;
+
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+
+import com.google.common.base.Preconditions;
+
+public class RowRangeUtil {
+
+  /**
+   * Validates a range is a valid KeyExtent data range, which is a special 
type of row range. These
+   * ranges are created by calling {@code new Range(startRow, false, endRow, 
true);} which is what
+   * {@link KeyExtent#toDataRange()} does.
+   *
+   * A KeyExtent data row range is defined as:
+   * <ul>
+   * <li>A range that has an inclusive start and exclusive end</li>
+   * <li>A range that only has the row portion set</li>
+   * <li>A range where both the start and end key end with a zero byte</li>
+   * </ul>
+   *
+   * @param range The range to validate
+   * @return The original range
+   */
+  public static Range requireKeyExtentDataRange(Range range) {
+    String errorMsg = "Range is not a KeyExtent data range";
+
+    if (!range.isInfiniteStartKey()) {
+      Preconditions.checkArgument(range.isStartKeyInclusive(),
+          "%s, start key must be inclusive. %s", errorMsg, range);
+      Preconditions.checkArgument(isOnlyRowSet(range.getStartKey()),
+          "%s, start key must only contain a row. %s", errorMsg, range);
+      Preconditions.checkArgument(isRowSuffixZeroByte(range.getStartKey()),
+          "%s, start key does not end with zero byte. %s, ", errorMsg, range);
+    }
+
+    if (!range.isInfiniteStopKey()) {
+      Preconditions.checkArgument(!range.isEndKeyInclusive(), "%s, end key 
must be exclusive. %s",
+          errorMsg, range);
+      Preconditions.checkArgument(isOnlyRowSet(range.getEndKey()),
+          "%s, end key must only contain a row. %s", errorMsg, range);
+      Preconditions.checkArgument(isRowSuffixZeroByte(range.getEndKey()),
+          "%s, end key does not end with a zero byte. %s, ", errorMsg, range);
+    }
+
+    return range;
+  }
+
+  public static boolean isOnlyRowSet(Key key) {
+    return key.getColumnFamilyData().length() == 0 && 
key.getColumnQualifierData().length() == 0
+        && key.getColumnVisibilityData().length() == 0 && key.getTimestamp() 
== Long.MAX_VALUE;
+  }
+
+  public static boolean isRowSuffixZeroByte(Key key) {
+    var row = key.getRowData();
+    return row.length() > 0 && row.byteAt(row.length() - 1) == (byte) 0x00;
+  }
+
+  public static ByteSequence stripZeroTail(ByteSequence row) {
+    if (row.byteAt(row.length() - 1) == (byte) 0x00) {
+      return row.subSequence(0, row.length() - 1);
+    }
+    return row;
+  }
+}
diff --git 
a/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileClientTest.java 
b/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileClientTest.java
index fcf2f62764..3466026518 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileClientTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/client/rfile/RFileClientTest.java
@@ -232,7 +232,7 @@ public class RFileClientTest {
 
     LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
 
-    Range range = new Range(rowStr(3), true, rowStr(14), true);
+    Range range = new Range(rowStr(3), false, rowStr(14), true);
     Scanner scanner =
         RFile.newScanner().from(new FencedPath(new Path(new 
File(testFile).toURI()), range))
             .withFileSystem(localFs).build();
@@ -259,9 +259,10 @@ public class RFileClientTest {
     new FencedPath(new Path(new File(testFile).toURI()), new Range());
     // This constructor converts to the proper inclusive/exclusive rows
     new FencedPath(new Path(new File(testFile).toURI()),
-        new Range(rowStr(3), true, rowStr(14), true));
-    new FencedPath(new Path(new File(testFile).toURI()), new Range(new 
Key(rowStr(3)), true,
-        new Key(rowStr(14)).followingKey(PartialKey.ROW), false));
+        new Range(rowStr(3), false, rowStr(14), true));
+    new FencedPath(new Path(new File(testFile).toURI()),
+        new Range(new Key(rowStr(3)).followingKey(PartialKey.ROW), true,
+            new Key(rowStr(14)).followingKey(PartialKey.ROW), false));
 
     // Test invalid Row Ranges
     // Missing 0x00 byte
@@ -291,7 +292,7 @@ public class RFileClientTest {
 
     LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
 
-    Range range = new Range(rowStr(3), true, rowStr(14), true);
+    Range range = new Range(rowStr(3), false, rowStr(14), true);
 
     RFileSKVIterator reader =
         getReader(localFs, UnreferencedTabletFile.ofRanged(localFs, new 
File(testFile), range));
diff --git 
a/core/src/test/java/org/apache/accumulo/core/data/KeyExtentTest.java 
b/core/src/test/java/org/apache/accumulo/core/data/KeyExtentTest.java
index 1392ddfb56..778f3854a1 100644
--- a/core/src/test/java/org/apache/accumulo/core/data/KeyExtentTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/data/KeyExtentTest.java
@@ -21,6 +21,7 @@ package org.apache.accumulo.core.data;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.ByteArrayInputStream;
@@ -181,4 +182,46 @@ public class KeyExtentTest {
     return KeyExtent.readFrom(new DataInputStream(bais));
   }
 
+  @Test
+  public void testClip() {
+    // Test extent will null end/prevEnd and infinite range matches original
+    ke = new KeyExtent(TableId.of("1"), null, null);
+    Range range = new Range();
+    KeyExtent expected = ke;
+    KeyExtent clipped = ke.clip(range);
+    assertEquals(expected, clipped);
+
+    // Test extent with null end/prevEnd with a range, should match the range
+    expected = new KeyExtent(TableId.of("1"), new Text("r_0001"), new 
Text("r_0000"));
+    ke = new KeyExtent(TableId.of("1"), null, null);
+    range = new Range("r_0000", false, "r_0001", true);
+    clipped = ke.clip(range);
+    assertEquals(expected, clipped);
+
+    // Test resulting extent is narrowed, the overlapping rows are 4 - 7
+    expected = new KeyExtent(TableId.of("1"), new Text("r_0007"), new 
Text("r_0003"));
+    ke = new KeyExtent(TableId.of("1"), new Text("r_0007"), new 
Text("r_0000"));
+    range = new Range("r_0003", false, "r_0009", true);
+    clipped = ke.clip(range);
+    assertEquals(expected, clipped);
+
+    // Test disjoint
+    ke = new KeyExtent(TableId.of("1"), new Text("r_0007"), new 
Text("r_0005"));
+    range = new Range("r_0002", false, "r_0004", true);
+    clipped = ke.clip(range, true);
+    assertNull(clipped);
+    final Range r = range;
+    assertThrows(IllegalArgumentException.class, () -> ke.clip(r));
+
+    // Invalid row ranges
+    assertThrows(IllegalArgumentException.class,
+        () -> ke.clip(new Range("r_0002", true, "r_0004", true)));
+    assertThrows(IllegalArgumentException.class, () -> ke
+        .clip(new Range(new Key("r_0002", "cf_0002"), false, new Key("r_0003", 
"cf_0003"), true)));
+    // Test empty row to make sure isExclusiveKey() checks length
+    assertThrows(IllegalArgumentException.class,
+        () -> ke.clip(new Range(new 
Key("r_0004").followingKey(PartialKey.ROW), true,
+            new Key("").followingKey(PartialKey.ROW), false)));
+  }
+
 }
diff --git 
a/core/src/test/java/org/apache/accumulo/core/file/rfile/AbstractRFileTest.java 
b/core/src/test/java/org/apache/accumulo/core/file/rfile/AbstractRFileTest.java
index a220407b92..26d7a4baf5 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/file/rfile/AbstractRFileTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/file/rfile/AbstractRFileTest.java
@@ -37,7 +37,9 @@ import org.apache.accumulo.core.crypto.CryptoFactoryLoader;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.TableId;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
 import org.apache.accumulo.core.file.FileSKVIterator;
 import 
org.apache.accumulo.core.file.blockfile.cache.impl.BlockCacheConfiguration;
 import 
org.apache.accumulo.core.file.blockfile.cache.impl.BlockCacheManagerFactory;
@@ -61,6 +63,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
 
 public abstract class AbstractRFileTest {
 
@@ -94,6 +97,10 @@ public abstract class AbstractRFileTest {
     }
 
     public void openWriter(boolean startDLG, int blockSize) throws IOException 
{
+      openWriter(startDLG, blockSize, 1000);
+    }
+
+    public void openWriter(boolean startDLG, int blockSize, int 
indexBlockSize) throws IOException {
       baos = new ByteArrayOutputStream();
       dos = new FSDataOutputStream(baos, new FileSystem.Statistics("a"));
       CryptoService cs = 
CryptoFactoryLoader.getServiceForClient(CryptoEnvironment.Scope.TABLE,
@@ -109,7 +116,7 @@ public abstract class AbstractRFileTest {
         sampler = SamplerFactory.newSampler(samplerConfig, 
accumuloConfiguration);
       }
 
-      writer = new RFile.Writer(_cbw, blockSize, 1000, samplerConfig, sampler);
+      writer = new RFile.Writer(_cbw, blockSize, indexBlockSize, 
samplerConfig, sampler);
 
       if (startDLG) {
         writer.startDefaultLocalityGroup();
@@ -261,4 +268,32 @@ public abstract class AbstractRFileTest {
     assertFalse(eki.hasNext());
     assertFalse(evi.hasNext());
   }
+
+  protected void verifyEstimated(FileSKVIterator reader) throws IOException {
+    // Test estimated entries for 1 row
+    long estimated = reader.estimateOverlappingEntries(new 
KeyExtent(TableId.of("1"),
+        new Text(formatString("r_", 1)), new Text(formatString("r_", 0))));
+    // One row contains 256 but the estimate will be more with overlapping 
index entries
+    assertEquals(264, estimated);
+
+    // Test for 2 rows
+    estimated = reader.estimateOverlappingEntries(new 
KeyExtent(TableId.of("1"),
+        new Text(formatString("r_", 2)), new Text(formatString("r_", 0))));
+    // Two rows contains 512 but the estimate will be more with overlapping 
index entries
+    assertEquals(516, estimated);
+
+    // 3 rows
+    // Actual should be 768, estimate is 772
+    estimated = reader.estimateOverlappingEntries(
+        new KeyExtent(TableId.of("1"), null, new Text(formatString("r_", 0))));
+    assertEquals(772, estimated);
+
+    // Tests when full number of entries should return
+    estimated = reader.estimateOverlappingEntries(new 
KeyExtent(TableId.of("1"), null, null));
+    assertEquals(1024, estimated);
+
+    estimated = reader.estimateOverlappingEntries(
+        new KeyExtent(TableId.of("1"), new Text(formatString("r_", 4)), null));
+    assertEquals(1024, estimated);
+  }
 }
diff --git 
a/core/src/test/java/org/apache/accumulo/core/file/rfile/FencedRFileTest.java 
b/core/src/test/java/org/apache/accumulo/core/file/rfile/FencedRFileTest.java
index 3698216bea..70be12bb6c 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/file/rfile/FencedRFileTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/file/rfile/FencedRFileTest.java
@@ -32,7 +32,9 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.accumulo.core.crypto.CryptoTest;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.TableId;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
 import org.apache.accumulo.core.file.FileSKVIterator;
 import org.apache.accumulo.core.file.rfile.RFile.FencedIndex;
 import org.apache.accumulo.core.file.rfile.RFile.FencedReader;
@@ -181,7 +183,7 @@ public class FencedRFileTest extends AbstractRFileTest {
     final TestRFile trf = initTestFile();
 
     // Fence off the file to contain only 1 row (r_00001)
-    Range range = new Range(new Range("r_000001"));
+    Range range = new Range("r_000001");
     trf.openReader(range);
 
     // Open a fenced reader
@@ -312,6 +314,74 @@ public class FencedRFileTest extends AbstractRFileTest {
     assertFalse(reader.hasTop());
   }
 
+  @Test
+  public void testEstimateOverlappingEntries() throws IOException {
+    final TestRFile trf = new TestRFile(conf);
+    // set block sizes lower to get a better estimate
+    trf.openWriter(true, 100, 100);
+    // write a test file with 1024 entries
+    writeTestFile(trf);
+    trf.closeWriter();
+
+    // Test with infinite start/end range for fenced file
+    Range range = new Range();
+    trf.openReader(range);
+    FencedReader reader = (FencedReader) trf.iter;
+    // Expect entire range to be seen, so we can re-use the same 
verifyEstimate() tests
+    // used for non-fenced files
+    verifyEstimated(reader);
+    trf.closeReader();
+
+    // Test with start/end range for fenced file that covers full file
+    range = new Range(null, false, "r_000004", true);
+    trf.openReader(range);
+    reader = (FencedReader) trf.iter;
+    verifyEstimated(reader);
+    trf.closeReader();
+
+    // Fence off the file to contain only 1 row (r_00001)
+    range = new Range("r_000001", false, "r_000002", true);
+    trf.openReader(range);
+    reader = (FencedReader) trf.iter;
+
+    // Key extent is null end/prev end row but file is fenced to 1 row
+    var extent = new KeyExtent(TableId.of("1"), null, null);
+    long estimated = reader.estimateOverlappingEntries(extent);
+    // One row contains 256 but will overestimate slightly
+    assertEquals(258, estimated);
+
+    // Disjoint, fenced file is set to row 1 and KeyExtent is row 0
+    estimated = reader
+        .estimateOverlappingEntries(new KeyExtent(TableId.of("1"), new 
Text("r_000001"), null));
+    assertEquals(0, estimated);
+    trf.closeReader();
+
+    // Fence off the file to contain only 2 rows (r_00001 and r_000002)
+    range = new Range("r_000001", false, "r_000003", true);
+    trf.openReader(range);
+    reader = (FencedReader) trf.iter;
+
+    // Key extent is null end/prev end row but file is fenced to 2 rows
+    extent = new KeyExtent(TableId.of("1"), null, null);
+    estimated = reader.estimateOverlappingEntries(extent);
+    // two rows contain 512 but will overestimate slightly
+    assertEquals(514, estimated);
+
+    // 1 overlapping row
+    extent = new KeyExtent(TableId.of("1"), new Text("r_000002"), null);
+    estimated = reader.estimateOverlappingEntries(extent);
+    assertEquals(258, estimated);
+    trf.closeReader();
+
+    // Invalid row range
+    range = new Range("r_000001", true, "r_000003", true);
+    trf.openReader(range);
+    final var r = (FencedReader) trf.iter;
+    assertThrows(IllegalArgumentException.class,
+        () -> r.estimateOverlappingEntries(new KeyExtent(TableId.of("1"), 
null, null)));
+    trf.closeReader();
+  }
+
   private int testFencing(List<Range> fencedRange, List<Range> expectedRange) 
throws IOException {
     // test an rfile with multiple rows having multiple columns
 
diff --git 
a/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java 
b/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java
index 3dec555203..b81b0196d9 100644
--- a/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java
@@ -1512,6 +1512,39 @@ public class RFileTest extends AbstractRFileTest {
     trf.closeReader();
   }
 
+  @Test
+  public void testEstimateOverlappingEntries() throws IOException {
+    TestRFile trf = new TestRFile(conf);
+    // lower block sizes so estimates are closer
+    trf.openWriter(true, 100, 100);
+
+    // generate 1024 entries
+    int count = 0;
+    for (int row = 0; row < 4; row++) {
+      String rowS = formatString("r_", row);
+      for (int cf = 0; cf < 4; cf++) {
+        String cfS = formatString("cf_", cf);
+        for (int cq = 0; cq < 4; cq++) {
+          String cqS = formatString("cq_", cq);
+          for (int cv = 'A'; cv < 'A' + 4; cv++) {
+            String cvS = "" + (char) cv;
+            for (int ts = 4; ts > 0; ts--) {
+              Key k = newKey(rowS, cfS, cqS, cvS, ts);
+              Value v = newValue("" + count);
+              trf.writer.append(k, v);
+              count++;
+            }
+          }
+        }
+      }
+    }
+    trf.closeWriter();
+
+    trf.openReader();
+    verifyEstimated(trf.reader);
+    trf.closeReader();
+  }
+
   @Test
   public void testMissingUnreleasedVersions() {
     assertThrows(NullPointerException.class,
diff --git 
a/core/src/test/java/org/apache/accumulo/core/metadata/schema/ReferencedTabletFileTest.java
 
b/core/src/test/java/org/apache/accumulo/core/metadata/schema/ReferencedTabletFileTest.java
index 93e7bc0525..04c38f9f9b 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/metadata/schema/ReferencedTabletFileTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/metadata/schema/ReferencedTabletFileTest.java
@@ -132,24 +132,34 @@ public class ReferencedTabletFileTest {
     assertThrows(IllegalArgumentException.class, () -> new 
ReferencedTabletFile(testPath, r2));
 
     // range where the key looks like a row, but the start key inclusivity is 
not whats expected
-    Range r3 = new Range(new Key("r1"), false, new Key("r2"), false);
+    Range r3 = new Range(new Key("r1").followingKey(PartialKey.ROW), false, 
new Key("r2"), false);
     assertThrows(IllegalArgumentException.class, () -> new 
ReferencedTabletFile(testPath, r3));
 
     // range where the key looks like a row, but the end key inclusivity is 
not whats expected
-    Range r4 = new Range(new Key("r1"), true, new Key("r2"), true);
+    Range r4 = new Range(new Key("r1").followingKey(PartialKey.ROW), true, new 
Key("r2"), true);
     assertThrows(IllegalArgumentException.class, () -> new 
ReferencedTabletFile(testPath, r4));
 
     // range where end key does not end with correct byte and is marked 
exclusive false
-    Range r5 = new Range(new Key("r1"), true, new Key("r2"), false);
+    Range r5 = new Range(new Key("r1").followingKey(PartialKey.ROW), true, new 
Key("r2"), false);
     assertThrows(IllegalArgumentException.class, () -> new 
ReferencedTabletFile(testPath, r5));
 
     // This is valid as the end key is exclusive and ends in 0x00
-    Range r6 = new Range(new Key("r1"), true, new 
Key("r2").followingKey(PartialKey.ROW), false);
+    Range r6 = new Range(new Key("r1").followingKey(PartialKey.ROW), true,
+        new Key("r2").followingKey(PartialKey.ROW), false);
     assertTrue(new ReferencedTabletFile(testPath, r6).hasRange());
 
-    // This is valid as the end key will be converted to exclusive and 0x00 
should be appended
-    Range r7 = new Range(new Text("r1"), true, new Text("r2"), true);
+    // This is valid as the start key will be converted to inclusive and 0x00 
should be appended
+    // and the end key will be converted to exclusive and 0x00 should also be 
appended
+    Range r7 = new Range(new Text("r1"), false, new Text("r2"), true);
     assertTrue(new ReferencedTabletFile(testPath, r7).hasRange());
+
+    // This is invalid as the start key is exclusive
+    Range r8 = new Range(new Key("r1"), false, new 
Key("r2").followingKey(PartialKey.ROW), false);
+    assertThrows(IllegalArgumentException.class, () -> new 
ReferencedTabletFile(testPath, r8));
+
+    // This is invalid as the start key is missing the 0x00 byte
+    Range r9 = new Range(new Key("r1"), true, new 
Key("r2").followingKey(PartialKey.ROW), false);
+    assertThrows(IllegalArgumentException.class, () -> new 
ReferencedTabletFile(testPath, r9));
   }
 
 }
diff --git 
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
 
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
index b7e2bd2a11..b8e7cf38a1 100644
--- 
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
+++ 
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
@@ -353,9 +353,8 @@ public class MetadataConstraintsTest {
     // required for an endRow so will fail validation
     m = new Mutation(new Text("0;foo"));
     m.put(BulkFileColumnFamily.NAME,
-        new Text(StoredTabletFile
-            .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range("a", "b"))
-            .getMetadata().replaceFirst("\"endRow\":\".*\"",
+        new Text(StoredTabletFile.of(new 
Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"),
+            new Range("a", false, "b", 
true)).getMetadata().replaceFirst("\"endRow\":\".*\"",
                 "\"endRow\":\"" + encodeRowForMetadata("bad") + "\"")),
         new Value("5"));
     assertViolation(mc, m, (short) 9);
@@ -439,9 +438,8 @@ public class MetadataConstraintsTest {
     // required for an endRow so this will fail validation
     m = new Mutation(new Text("0;foo"));
     m.put(columnFamily,
-        new Text(StoredTabletFile
-            .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range("a", "b"))
-            .getMetadata()
+        new Text(StoredTabletFile.of(new 
Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"),
+            new Range("a", false, "b", true)).getMetadata()
             .replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + 
encodeRowForMetadata("b") + "\"")),
         value);
     assertViolation(mc, m, (short) 9);
@@ -467,9 +465,8 @@ public class MetadataConstraintsTest {
     // Should pass validation with range set
     m = new Mutation(new Text("0;foo"));
     m.put(columnFamily,
-        StoredTabletFile
-            .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range("a", "b"))
-            .getMetadataText(),
+        StoredTabletFile.of(new 
Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"),
+            new Range("a", false, "b", true)).getMetadataText(),
         new DataFileValue(1, 1).encodeAsValue());
     violations = mc.check(createEnv(), m);
     assertNull(violations);
diff --git a/test/src/main/java/org/apache/accumulo/test/CloneIT.java 
b/test/src/main/java/org/apache/accumulo/test/CloneIT.java
index 74dad9787e..2d7caa2e94 100644
--- a/test/src/main/java/org/apache/accumulo/test/CloneIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/CloneIT.java
@@ -439,7 +439,8 @@ public class CloneIT extends AccumuloClusterHarness {
           // Pass in up to 3 arguments of infinite ranges to test non-ranged 
files
           Arguments.of(new Range(), new Range(), new Range()),
           // For second run pass in up to 3 arguments with the first two 
non-infinite ranges
-          Arguments.of(new Range("row_0"), new Range("row_1"), new Range()));
+          Arguments.of(new Range(null, false, "row_0", true),
+              new Range("row_0", false, "row_1", true), new Range()));
     }
   }
 }
diff --git a/test/src/main/java/org/apache/accumulo/test/VolumeIT.java 
b/test/src/main/java/org/apache/accumulo/test/VolumeIT.java
index 79c6f6d261..8428aef52c 100644
--- a/test/src/main/java/org/apache/accumulo/test/VolumeIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/VolumeIT.java
@@ -558,10 +558,9 @@ public class VolumeIT extends ConfigurableMacBase {
       final DataFileValue newValue = new DataFileValue(Integer.max(1, (int) 
(value.getSize() / 2)),
           Integer.max(1, (int) (value.getNumEntries() / 2)));
       mutator.putFile(StoredTabletFile.of(file.getPath(),
-          new Range(tm.getExtent().prevEndRow(), tabletMidPoint)), newValue);
-      mutator.putFile(
-          StoredTabletFile.of(file.getPath(), new Range(tabletMidPoint, 
tm.getExtent().endRow())),
-          newValue);
+          new Range(tm.getExtent().prevEndRow(), false, tabletMidPoint, 
true)), newValue);
+      mutator.putFile(StoredTabletFile.of(file.getPath(),
+          new Range(tabletMidPoint, false, tm.getExtent().endRow(), true)), 
newValue);
     });
   }
 


Reply via email to