nastra commented on code in PR #16441:
URL: https://github.com/apache/iceberg/pull/16441#discussion_r3273406371
##########
core/src/main/java/org/apache/iceberg/StatsUtil.java:
##########
@@ -34,80 +34,67 @@
class StatsUtil {
private static final Logger LOG = LoggerFactory.getLogger(StatsUtil.class);
- // the number of reserved field IDs from the reserved field ID space as
defined in
- // https://iceberg.apache.org/spec/#reserved-field-ids
- static final int NUM_RESERVED_FIELD_IDS = 200;
- // the starting field ID of the reserved field ID space
- static final int RESERVED_FIELD_IDS_START = Integer.MAX_VALUE -
NUM_RESERVED_FIELD_IDS;
- // the number of supported stats per table column
+ private static final int FIRST_SUPPORTED_METADATA_FIELD_ID =
+ MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.fieldId();
+ static final Set<Integer> SUPPORTED_METADATA_FIELD_IDS =
+ Sets.newHashSet(
+ MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.fieldId(),
MetadataColumns.ROW_ID.fieldId());
static final int NUM_SUPPORTED_STATS_PER_COLUMN = 200;
- // the starting field ID of the stats space for data field IDs
+ static final int STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS = 9_000;
static final int STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS = 10_000;
- // the starting field ID of the stats space for metadata field IDs
- static final int STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS =
2_147_000_000;
- // support stats for only up to this amount of data field IDs
- static final int MAX_DATA_FIELD_ID = 1_000_000;
- static final int MAX_DATA_STATS_FIELD_ID = 200_010_000;
+ // exclusive upper bound of the stats field ID range reserved for
content_stats
+ static final int STATS_SPACE_FIELD_ID_END = 200_000_000;
+ static final int MAX_DATA_STATS_FIELD_ID =
+ STATS_SPACE_FIELD_ID_END - NUM_SUPPORTED_STATS_PER_COLUMN;
+ // the max data field ID whose stats struct fits within the reserved range
+ static final int MAX_DATA_FIELD_ID =
+ (MAX_DATA_STATS_FIELD_ID - STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS)
+ / NUM_SUPPORTED_STATS_PER_COLUMN;
private StatsUtil() {}
public static int statsFieldIdForField(int fieldId) {
- return fieldId >= RESERVED_FIELD_IDS_START
+ return SUPPORTED_METADATA_FIELD_IDS.contains(fieldId)
? statsFieldIdForReservedField(fieldId)
: statsFieldIdForDataField(fieldId);
}
private static int statsFieldIdForDataField(int fieldId) {
- long statsFieldId =
- STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS
- + (NUM_SUPPORTED_STATS_PER_COLUMN * (long) fieldId);
if (fieldId < 0 || fieldId > MAX_DATA_FIELD_ID) {
return -1;
}
- return (int) statsFieldId;
+ return STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS +
(NUM_SUPPORTED_STATS_PER_COLUMN * fieldId);
}
private static int statsFieldIdForReservedField(int fieldId) {
- int offset = NUM_RESERVED_FIELD_IDS - (Integer.MAX_VALUE - fieldId);
-
- long statsFieldId =
- STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS
- + (NUM_SUPPORTED_STATS_PER_COLUMN * (long) offset);
- if (statsFieldId < 0 || statsFieldId > RESERVED_FIELD_IDS_START) {
- // ID overflows
- return -1;
- }
-
- return (int) statsFieldId;
+ return STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS
+ + (NUM_SUPPORTED_STATS_PER_COLUMN * (fieldId -
FIRST_SUPPORTED_METADATA_FIELD_ID));
}
public static int fieldIdForStatsField(int statsFieldId) {
- if (statsFieldId < STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS
+ if (statsFieldId < STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS
+ || statsFieldId >= STATS_SPACE_FIELD_ID_END
|| statsFieldId % NUM_SUPPORTED_STATS_PER_COLUMN != 0) {
return -1;
}
- return statsFieldId < STATS_SPACE_FIELD_ID_START_FOR_METADATA_FIELDS
- ? fieldIdForStatsFieldFromDataField(statsFieldId)
- : fieldIdForStatsFieldFromReservedField(statsFieldId);
+ return statsFieldId < STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS
+ ? fieldIdForStatsFieldFromReservedField(statsFieldId)
+ : fieldIdForStatsFieldFromDataField(statsFieldId);
}
private static int fieldIdForStatsFieldFromDataField(int statsFieldId) {
- return Math.max(
- -1,
- (statsFieldId - STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS)
- / NUM_SUPPORTED_STATS_PER_COLUMN);
+ return (statsFieldId - STATS_SPACE_FIELD_ID_START_FOR_DATA_FIELDS)
+ / NUM_SUPPORTED_STATS_PER_COLUMN;
}
private static int fieldIdForStatsFieldFromReservedField(int statsFieldId) {
- return Math.max(
Review Comment:
this isn't needed anymore, because the caller already guarantees that `9000
<= statsFieldId < 1000`, so this can never be negative
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]