junegunn commented on code in PR #7037:
URL: https://github.com/apache/hbase/pull/7037#discussion_r2113524169


##########
hbase-diagnostics/src/main/java/org/apache/hadoop/hbase/PerformanceEvaluation.java:
##########
@@ -367,36 +367,67 @@ public void setStatus(String msg) {
    * opts.replicas}.
    */
   static boolean checkTable(Admin admin, TestOptions opts) throws IOException {
-    TableName tableName = TableName.valueOf(opts.tableName);
-    boolean needsDelete = false, exists = admin.tableExists(tableName);
-    boolean isReadCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
+    final TableName tableName = TableName.valueOf(opts.tableName);
+    final boolean exists = admin.tableExists(tableName);
+    final boolean isReadCmd = 
opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
       || opts.cmdName.toLowerCase(Locale.ROOT).contains("scan");
-    boolean isDeleteCmd = 
opts.cmdName.toLowerCase(Locale.ROOT).contains("delete");
-    if (!exists && (isReadCmd || isDeleteCmd)) {
+    final boolean isDeleteCmd = 
opts.cmdName.toLowerCase(Locale.ROOT).contains("delete");
+    final boolean needsData = isReadCmd || isDeleteCmd;
+    if (!exists && needsData) {
       throw new IllegalStateException(
-        "Must specify an existing table for read commands. Run a write command 
first.");
+        "Must specify an existing table for read/delete commands. Run a write 
command first.");
     }
     TableDescriptor desc = exists ? 
admin.getDescriptor(TableName.valueOf(opts.tableName)) : null;
-    byte[][] splits = getSplits(opts);
+    final byte[][] splits = getSplits(opts);
 
     // recreate the table when user has requested presplit or when existing
     // {RegionSplitPolicy,replica count} does not match requested, or when the
     // number of column families does not match requested.
-    if (
-      (exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions
-        && opts.presplitRegions != admin.getRegions(tableName).size())
-        || (!isReadCmd && desc != null
-          && !StringUtils.equals(desc.getRegionSplitPolicyClassName(), 
opts.splitPolicy))
-        || (!(isReadCmd || isDeleteCmd) && desc != null
-          && desc.getRegionReplication() != opts.replicas)
-        || (desc != null && desc.getColumnFamilyCount() != opts.families)
-    ) {
-      needsDelete = true;
-      // wait, why did it delete my table?!?
-      LOG.debug(MoreObjects.toStringHelper("needsDelete").add("needsDelete", 
needsDelete)
-        .add("isReadCmd", isReadCmd).add("exists", exists).add("desc", desc)
-        .add("presplit", opts.presplitRegions).add("splitPolicy", 
opts.splitPolicy)
-        .add("replicas", opts.replicas).add("families", 
opts.families).toString());
+    final boolean regionCountChanged =
+      exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions
+        && opts.presplitRegions != admin.getRegions(tableName).size();
+    final boolean splitPolicyChanged =
+      exists && !StringUtils.equals(desc.getRegionSplitPolicyClassName(), 
opts.splitPolicy);
+    final boolean regionReplicationChanged = exists && 
desc.getRegionReplication() != opts.replicas;
+    final boolean columnFamilyCountChanged = exists && 
desc.getColumnFamilyCount() != opts.families;
+
+    boolean needsDelete = regionCountChanged || splitPolicyChanged || 
regionReplicationChanged
+      || columnFamilyCountChanged;
+
+    if (needsDelete) {
+      final List<String> errors = new ArrayList<>();
+      if (columnFamilyCountChanged) {
+        final String error = String.format("--families=%d, but found %d column 
families",
+          opts.families, desc.getColumnFamilyCount());
+        if (needsData) {
+          // We can't proceed the test in this case
+          throw new IllegalStateException(
+            "Cannot proceed the test. Run a write command first: " + error);
+        }
+        errors.add(error);
+      }
+      if (regionCountChanged) {
+        errors.add(String.format("--presplit=%d, but found %d regions", 
opts.presplitRegions,
+          admin.getRegions(tableName).size()));
+      }
+      if (splitPolicyChanged) {
+        errors.add(String.format("--splitPolicy=%s, but current policy is %s", 
opts.splitPolicy,
+          desc.getRegionSplitPolicyClassName()));
+      }
+      if (regionReplicationChanged) {
+        errors.add(String.format("--replicas=%d, but found %d replicas", 
opts.replicas,
+          desc.getRegionReplication()));
+      }
+      final String reason =
+        errors.stream().map(s -> '[' + s + ']').collect(Collectors.joining(", 
"));
+
+      if (needsData) {
+        LOG.warn(
+          "Inconsistent table state detected. Consider running a write command 
first: " + reason);
+        needsDelete = false;

Review Comment:
   > I went back to check if that `presplitRegions` could do anything for read 
test, and it does not (please double check that).
   
   `--presplit` does not affect how PE performs reads, but it can trigger a 
recreation of the existing table (as shown in 
https://issues.apache.org/jira/browse/HBASE-29357). So, preventing such 
recreation, which is unwanted and makes the read test meaningless, with a 
minimal change of the current behavior is the purpose of this pull request. 
   
   `--replica` affects read tests by enabling timeline-consistent reads.
   
   Anyway, thanks for the suggestion. I rephrased it a little bit in 
https://github.com/apache/hbase/pull/7037/commits/646c9f1b076e57a22fe65b95e2f2ccab999a91b4.
 I wanted to leave it up to the user to determine if the inconsistencies are 
expected (e.g. manually split the table, using `--replica=1` on a table with 
more replicas to measure the performance impact of using timeline-consistent 
reads, etc.). Also, I figured the information should be useful when actually 
recreating the table, so I decided to promote it to "INFO".
   
   Please take a look and let me know what you think.
   
   
   > Table will be recreated: [--presplit=40, but found 30 regions]
   >
   > Unexpected or incorrect options provided for randomRead. Please verify 
whether the detected inconsistencies are expected or ignorable: [--presplit=30, 
but found 40 regions], [--replicas=3, but found 1 replicas]. The test will 
proceed, but the results may not be reliable.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to