amogh-jahagirdar commented on code in PR #14480:
URL: https://github.com/apache/iceberg/pull/14480#discussion_r2492299966
##########
core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java:
##########
@@ -429,6 +562,68 @@ public <T extends RESTResponse> T handleRequest(
return null;
}
+ /**
+ * Do all the planning upfront but batch the file scan tasks across plan
tasks. Plan Tasks have a
+ * key like <plan ID - table UUID - plan task sequence> The current
implementation simply uses
+ * plan tasks as a pagination mechanism to control response sizes.
+ *
+ * @param tableScan
+ * @param planId
+ */
+ private void planFilesFor(TableScan tableScan, String planId) {
+ Iterable<List<FileScanTask>> taskGroupings =
+ Iterables.partition(
+ tableScan.planFiles(),
planningBehavior.numberFileScanTasksPerPlanTask());
+ int planTaskSequence = 0;
+ String prevPlanTask = null;
+ for (List<FileScanTask> taskGrouping : taskGroupings) {
+ String planTaskKey =
+ String.format("%s-%s-%s", planId, tableScan.table().uuid(),
planTaskSequence++);
+ planTaskToFileScanTasks.put(planTaskKey, taskGrouping);
+ if (prevPlanTask != null) {
+ planTaskToNext.put(prevPlanTask, planTaskKey);
+ }
+
+ prevPlanTask = planTaskKey;
+ }
+ }
+
+ private void asyncPlanFiles(TableScan scan, String asyncPlanId) {
+ asyncPlanningPool.submit(
+ () -> {
+ planFilesFor(scan, asyncPlanId);
+ });
+ }
+
+ // The initial set of file scan tasks is going to have a sentinel plan task
which ends in
+ // 0. Directly return this set of file scan tasks as the initial set, along
with
+ // any next plan task if applicable
+ private Pair<List<FileScanTask>, String> initialScanTasksForPlan(String
planId) {
+ Set<Map.Entry<String, List<FileScanTask>>> initialPlanTaskAndFileScanTasks
=
+ planTaskToFileScanTasks.entrySet().stream()
+ .filter(
+ planTask -> planTask.getKey().contains(planId) &&
planTask.getKey().endsWith("0"))
Review Comment:
Ah yeah let me clean this up...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]