junegunn commented on code in PR #7037:
URL: https://github.com/apache/hbase/pull/7037#discussion_r2113027478
##########
hbase-diagnostics/src/main/java/org/apache/hadoop/hbase/PerformanceEvaluation.java:
##########
@@ -367,36 +367,67 @@ public void setStatus(String msg) {
* opts.replicas}.
*/
static boolean checkTable(Admin admin, TestOptions opts) throws IOException {
- TableName tableName = TableName.valueOf(opts.tableName);
- boolean needsDelete = false, exists = admin.tableExists(tableName);
- boolean isReadCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
+ final TableName tableName = TableName.valueOf(opts.tableName);
+ final boolean exists = admin.tableExists(tableName);
+ final boolean isReadCmd =
opts.cmdName.toLowerCase(Locale.ROOT).contains("read")
|| opts.cmdName.toLowerCase(Locale.ROOT).contains("scan");
- boolean isDeleteCmd =
opts.cmdName.toLowerCase(Locale.ROOT).contains("delete");
- if (!exists && (isReadCmd || isDeleteCmd)) {
+ final boolean isDeleteCmd =
opts.cmdName.toLowerCase(Locale.ROOT).contains("delete");
+ final boolean needsData = isReadCmd || isDeleteCmd;
+ if (!exists && needsData) {
throw new IllegalStateException(
- "Must specify an existing table for read commands. Run a write command
first.");
+ "Must specify an existing table for read/delete commands. Run a write
command first.");
}
TableDescriptor desc = exists ?
admin.getDescriptor(TableName.valueOf(opts.tableName)) : null;
- byte[][] splits = getSplits(opts);
+ final byte[][] splits = getSplits(opts);
// recreate the table when user has requested presplit or when existing
// {RegionSplitPolicy,replica count} does not match requested, or when the
// number of column families does not match requested.
- if (
- (exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions
- && opts.presplitRegions != admin.getRegions(tableName).size())
- || (!isReadCmd && desc != null
- && !StringUtils.equals(desc.getRegionSplitPolicyClassName(),
opts.splitPolicy))
- || (!(isReadCmd || isDeleteCmd) && desc != null
- && desc.getRegionReplication() != opts.replicas)
- || (desc != null && desc.getColumnFamilyCount() != opts.families)
- ) {
- needsDelete = true;
- // wait, why did it delete my table?!?
- LOG.debug(MoreObjects.toStringHelper("needsDelete").add("needsDelete",
needsDelete)
- .add("isReadCmd", isReadCmd).add("exists", exists).add("desc", desc)
- .add("presplit", opts.presplitRegions).add("splitPolicy",
opts.splitPolicy)
- .add("replicas", opts.replicas).add("families",
opts.families).toString());
+ final boolean regionCountChanged =
+ exists && opts.presplitRegions != DEFAULT_OPTS.presplitRegions
+ && opts.presplitRegions != admin.getRegions(tableName).size();
+ final boolean splitPolicyChanged =
+ exists && !StringUtils.equals(desc.getRegionSplitPolicyClassName(),
opts.splitPolicy);
+ final boolean regionReplicationChanged = exists &&
desc.getRegionReplication() != opts.replicas;
+ final boolean columnFamilyCountChanged = exists &&
desc.getColumnFamilyCount() != opts.families;
+
+ boolean needsDelete = regionCountChanged || splitPolicyChanged ||
regionReplicationChanged
+ || columnFamilyCountChanged;
+
+ if (needsDelete) {
+ final List<String> errors = new ArrayList<>();
+ if (columnFamilyCountChanged) {
+ final String error = String.format("--families=%d, but found %d column
families",
+ opts.families, desc.getColumnFamilyCount());
+ if (needsData) {
+ // We can't proceed the test in this case
+ throw new IllegalStateException(
+ "Cannot proceed the test. Run a write command first: " + error);
+ }
+ errors.add(error);
+ }
+ if (regionCountChanged) {
+ errors.add(String.format("--presplit=%d, but found %d regions",
opts.presplitRegions,
+ admin.getRegions(tableName).size()));
+ }
+ if (splitPolicyChanged) {
+ errors.add(String.format("--splitPolicy=%s, but current policy is %s",
opts.splitPolicy,
+ desc.getRegionSplitPolicyClassName()));
+ }
+ if (regionReplicationChanged) {
+ errors.add(String.format("--replicas=%d, but found %d replicas",
opts.replicas,
+ desc.getRegionReplication()));
+ }
+ final String reason =
+ errors.stream().map(s -> '[' + s + ']').collect(Collectors.joining(",
"));
+
+ if (needsData) {
+ LOG.warn(
+ "Inconsistent table state detected. Consider running a write command
first: " + reason);
+ needsDelete = false;
Review Comment:
@taklwu Thanks for the feedback!
> it sounds like some wrapper script accidentally added `--presplit=30` to
the read command
No, it was me just editing the workload type argument.
1. Run `bin/hbase pe --nomapred --size=2 --presplit=30 sequentialWrite 1` to
generate the test data
1. Edit the command, `sequentialWrite` to `randomRead` because except for
`--presplit`, you would have to keep the other ones to properly run any read
test against the data.
But I can imagine a script putting all the options to the command, expecting
unrelated options are automatically ignored (which has been generally true).
Otherwise, it would have to maintain two sets of options, one for read, and one
for write, and conditionally choose one. Furthermore, options like `--size` and
`--families` affect both types of test, so it would have to keep yet another
set for those.
> we should fail fast by disallowing the use of the --presplit option with
any read command upfront
The reason I decided not to "fail fast" was for consistency. PE tool has
many options for reads,
```
Read Tests:
filterAll Helps to filter out all the rows on the server side there
by not returning any thing back to the client. Helps to check the server side
performance. Uses FilterAllFilter internally.
multiGet Batch gets together into groups of N. Only supported by
randomRead. Default: disabled
inmemory Tries to keep the HFiles of the CF inmemory as far as
possible. Not guaranteed that reads are always served from memory. Default:
false
bloomFilter Bloom filter type, one of [NONE, ROW, ROWCOL,
ROWPREFIX_FIXED_LENGTH]
blockSize Blocksize to use when writing out hfiles.
inmemoryCompaction Makes the column family to do inmemory
flushes/compactions. Uses the CompactingMemstore
addColumns Adds columns to scans/gets explicitly. Default: true
replicas Enable region replica testing. Defaults: 1.
randomSleep Do a random sleep before each get between 0 and entered
value. Defaults: 0
caching Scan caching to use. Default: 30
asyncPrefetch Enable asyncPrefetch for scan
cacheBlocks Set the cacheBlocks option for scan. Default: true
scanReadType Set the readType option for scan, stream/pread/default.
Default: default
bufferSize Set the value of client side buffering. Default: 2MB
```
and many others for writes.
```
Table Creation / Write Tests:
table Alternate table name. Default: 'TestTable'
rows Rows each client runs. Default: 1048576. In case of
randomReads and randomSeekScans this could be specified along with --size to
specify the number of rows to be scanned within the total range specified by
the size.
size Total size in GiB. Mutually exclusive with --rows for
writes and scans. But for randomReads and randomSeekScans when you use size
with --rows you could use size to specify the end range and --rows specifies
the number of rows within that range. Default: 1.0.
compress Compression type to use (GZ, LZO, ...). Default: 'NONE'
encryption Encryption type to use (AES, ...). Default: 'NONE'
flushCommits Used to determine if the test should flush the table.
Default: false
valueZipf Set if we should vary value size between 0 and 'valueSize'
in zipf form: Default: Not set.
writeToWAL Set writeToWAL on puts. Default: True
autoFlush Set autoFlush on htable. Default: False
multiPut Batch puts together into groups of N. Only supported by
write. If multiPut is bigger than 0, autoFlush need to set to true. Default: 0
presplit Create presplit table. If a table with same name exists,
it'll be deleted and recreated (instead of verifying count of its existing
regions). Recommended for accurate perf analysis (see guide). Default: disabled
usetags Writes tags along with KVs. Use with HFile V3. Default:
false
numoftags Specify the no of tags that would be needed. This works
only if usetags is true. Default: 1
splitPolicy Specify a custom RegionSplitPolicy for the table.
columns Columns to write per row. Default: 1
families Specify number of column families for the table. Default: 1
```
And most of the read options are ignored in write tests and vice versa, so
PE does not complain.
```sh
# Generate data with some write options
bin/hbase pe --nomapred --size=2 --valueZipf --autoFlush=true
sequentialWrite 1
# They are ignored in read tests, so we don't have to manually remove them
from the command
bin/hbase pe --nomapred --size=2 --valueZipf --autoFlush=true randomRead 1
```
And I didn't want to make `--presplit` an exception. What do you think?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]