gortiz commented on code in PR #15135:
URL: https://github.com/apache/pinot/pull/15135#discussion_r1973359860


##########
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SpoolIntegrationTest.java:
##########
@@ -117,13 +121,146 @@ public void intermediateSpool()
     JsonNode stats = jsonNode.get("stageStats");
     assertNoError(jsonNode);
     DocumentContext parsed = JsonPath.parse(stats.toString());
-    List<Map<String, Object>> stage4On3 = parsed.read("$..[?(@.stage == 
3)]..[?(@.stage == 4)]");
-    Assert.assertEquals(stage4On3.size(), 1, "Stage 4 should be descended from 
stage 3 exactly once");
 
-    List<Map<String, Object>> stage4On7 = parsed.read("$..[?(@.stage == 
7)]..[?(@.stage == 4)]");
-    Assert.assertEquals(stage4On3.size(), 1, "Stage 4 should be descended from 
stage 7 exactly once");
+    checkSpoolTimes(parsed, 4, 3, 1);
+    checkSpoolTimes(parsed, 4, 7, 1);
+    checkSpoolSame(parsed, 4, 3, 7);
+  }
+
+  /**
+   * Test a complex with nested spools. Don't try to understand it, just check 
that the spools are correct.
+   * The test name corresponds to the PR that fixed the issue.
+   */
+  @Test
+  public void test15135()
+      throws Exception {
+    JsonNode jsonNode = postQuery("SET useSpools = true;\n"
+        + "\n"
+        + "WITH\n"
+        + "    q1 AS (\n"
+        + "        SELECT ArrTimeBlk as userUUID,\n"
+        + "               Dest as deviceOS,\n"
+        + "               SUM(ArrTime) AS totalTrips\n"
+        + "        FROM mytable\n"
+        + "        GROUP BY ArrTimeBlk, Dest\n"
+        + "    ),\n"
+        + "     q2 AS (\n"
+        + "         SELECT userUUID,\n"
+        + "                deviceOS,\n"
+        + "                SUM(totalTrips) AS totalTrips,\n"
+        + "                COUNT(DISTINCT userUUID) AS reach\n"
+        + "         FROM q1\n"
+        + "         GROUP BY userUUID,\n"
+        + "                  deviceOS\n"
+        + "     ),\n"
+        + "     q3 AS (\n"
+        + "         SELECT userUUID,\n"
+        + "                (totalTrips / reach) AS frequency\n"
+        + "         FROM q2\n"
+        + "     ),\n"
+        + "     q4 AS (\n"
+        + "         SELECT rd.userUUID,\n"
+        + "                rd.deviceOS,\n"
+        + "                rd.totalTrips as totalTrips,\n"
+        + "                rd.reach AS reach\n"
+        + "         FROM q2 rd\n"
+        + "     ),\n"
+        + "     q5 AS (\n"
+        + "         SELECT userUUID,\n"
+        + "                SUM(totalTrips) AS totalTrips\n"
+        + "         FROM q4\n"
+        + "         GROUP BY userUUID\n"
+        + "     ),\n"
+        + "     q6 AS (\n"
+        + "         SELECT s.userUUID,\n"
+        + "                s.totalTrips,\n"
+        + "                (s.totalTrips / o.frequency) AS reach,\n"
+        + "                'Traditional TV + OTT' AS deviceOS\n"
+        + "         FROM q5 s\n"
+        + "                  JOIN q3 o ON s.userUUID = o.userUUID\n"
+        + "     ),\n"
+        + "     q7 AS (\n"
+        + "         SELECT rd.userUUID,\n"
+        + "                rd.totalTrips,\n"
+        + "                rd.reach,\n"
+        + "                rd.deviceOS\n"
+        + "         FROM q4 rd\n"
+        + "         UNION ALL\n"
+        + "         SELECT f.userUUID,\n"
+        + "                f.totalTrips,\n"
+        + "                f.reach,\n"
+        + "                f.deviceOS\n"
+        + "         FROM q6 f\n"
+        + "     ),\n"
+        + "     q8 AS (\n"
+        + "         SELECT sd.*\n"
+        + "         FROM q7 sd\n"
+        + "                  JOIN (\n"
+        + "             SELECT deviceOS,\n"
+        + "                    PERCENTILETDigest(totalTrips, 20) AS p20\n"
+        + "             FROM q7\n"
+        + "             GROUP BY deviceOS\n"
+        + "         ) q ON sd.deviceOS = q.deviceOS\n"
+        + "     )\n"
+        + "SELECT *\n"
+        + "FROM q8");
+    JsonNode stats = jsonNode.get("stageStats");
+    assertNoError(jsonNode);
+    DocumentContext parsed = JsonPath.parse(stats.toString());
+
+    checkSpoolTimes(parsed, 6, 5, 1);
+    checkSpoolTimes(parsed, 6, 14, 1);
+    checkSpoolSame(parsed, 6, 5, 14);
+
+    checkSpoolTimes(parsed, 7, 6, 2);
+
+    checkSpoolTimes(parsed, 4, 3, 1);
+    checkSpoolTimes(parsed, 4, 7, 2); // because there are 2 copies of 7 as 
well

Review Comment:
   As you can see above, stage 7 is spooled twice inside 6. Stage 7 also 
depends (aka is parent of) one of the incarnations of stage 4. Therefore if we 
look for all stages whose id is 4 and 7 is an ancestor, we are going to find 2 
cases. 
   
   A schema would be great, I'll add one



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to