shubham-roy commented on code in PR #6435: URL: https://github.com/apache/hbase/pull/6435#discussion_r1845770311
########## hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java: ########## @@ -524,6 +526,106 @@ public void testInvalidTable() throws Exception { } } + /** + * Step 1: Add 6 rows(row1, row2, row3, row4, row5 and row6) to a table. Each row contains 1 + * column family and 4 columns. Step 2: Delete a column for row1. Step 3: Delete a column family + * for row2 and row4. Step 4: Delete all versions of a specific column for row3, row5 and row6. + * <p> + * Case 1: Run row counter without countDeleteMarkers flag Step a: Validate counter values. + * <p> + * Case 2: Run row counter with countDeleteMarkers flag Step a: Validate counter values. + */ + @Test + public void testRowCounterWithCountDeleteMarkersOption() throws Exception { + // Test Setup + + final TableName tableName = + TableName.valueOf(TABLE_NAME + "_" + "withCountDeleteMarkersOption"); + final byte[][] rowKeys = { Bytes.toBytes("row1"), Bytes.toBytes("row2"), Bytes.toBytes("row3"), + Bytes.toBytes("row4"), Bytes.toBytes("row5"), Bytes.toBytes("row6") }; + final byte[] columnFamily = Bytes.toBytes("cf"); + final byte[][] columns = + { Bytes.toBytes("A"), Bytes.toBytes("B"), Bytes.toBytes("C"), Bytes.toBytes("D") }; + final byte[] value = Bytes.toBytes("a"); + + try (Table table = TEST_UTIL.createTable(tableName, columnFamily)) { + // Step 1: Insert rows with columns + for (byte[] rowKey : rowKeys) { + Put put = new Put(rowKey); + for (byte[] col : columns) { + put.addColumn(columnFamily, col, value); + } + table.put(put); + } + TEST_UTIL.getAdmin().flush(tableName); + + // Steps 2, 3, and 4: Delete columns, families, and all versions of columns + Delete deleteA = new Delete(rowKeys[0]).addColumn(columnFamily, columns[0]); + Delete deleteB = new Delete(rowKeys[1]).addFamily(columnFamily); + Delete deleteC = new Delete(rowKeys[2]).addColumns(columnFamily, columns[0]); + Delete deleteD = new Delete(rowKeys[3]).addFamily(columnFamily); + Delete deleteE = new Delete(rowKeys[4]).addColumns(columnFamily, columns[0]); + Delete deleteF = new Delete(rowKeys[5]).addColumns(columnFamily, columns[0]); + + table.delete(deleteA); + table.delete(deleteB); + table.delete(deleteC); + table.delete(deleteD); + table.delete(deleteE); + table.delete(deleteF); + TEST_UTIL.getAdmin().flush(tableName); + } + + RowCounter rowCounterWithoutCountDeleteMarkers = new RowCounter(); + RowCounter rowCounterWithCountDeleteMarkers = new RowCounter(); + rowCounterWithoutCountDeleteMarkers.setConf(TEST_UTIL.getConfiguration()); + rowCounterWithCountDeleteMarkers.setConf(TEST_UTIL.getConfiguration()); + + // Invocation + + rowCounterWithoutCountDeleteMarkers.run(new String[] { tableName.getNameAsString() }); + rowCounterWithCountDeleteMarkers + .run(new String[] { tableName.getNameAsString(), "--countDeleteMarkers" }); + + // Validation + + // Case 1: + validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 4, 0, + 0, 0, 0, 0, 0); + + // Case 2: + validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 6, 30, Review Comment: >We had just 4 rows right, now the job returns 6? it counts deleted rows also now?? All the rows will be returned when doing a raw scan. >Cells is size of all cells (including deleted one)? Yes. >Is not DELETE supposed to be 0, we did not delete any row right? DELETE_COLUMN should't be 4? Please refer to the [Delete client](https://github.com/apache/hbase/blob/master/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Delete.java). It has the delete APIs and how they are matched to the delete marker types. addColumn -> Type.Delete addColumns -> Type.DeleteColumn addFamily -> Type.DeleteFamily -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org