This is an automated email from the ASF dual-hosted git repository. sajjad pushed a commit to branch hotfix-theta-sketch in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/hotfix-theta-sketch by this push: new e54888f597 Backwards compatible theta sketch aggregation (#12288) e54888f597 is described below commit e54888f597105216666ee3b0f4d28596434e9652 Author: David Cromberge <davecrombe...@gmail.com> AuthorDate: Sun Jan 21 22:42:08 2024 +0000 Backwards compatible theta sketch aggregation (#12288) * Backwards compatible theta sketch aggregation Servers running on versions before upgrading Pinot to the ThetaSketchAccumulator would return Sketches directly to the merge function. This ensures that there is backwards compatibility between the two. * Add Theta Sketch distinct count queries to compatibility check queries --- .../config/queries/feature-test-1-sql.queries | 6 +++--- .../config/queries/feature-test-2-sql-realtime.queries | 4 ++-- .../query-results/feature-test-1-rest-sql.results | 6 +++--- .../query-results/feature-test-2-sql-realtime.results | 4 ++-- .../DistinctCountThetaSketchAggregationFunction.java | 18 ++++++++++++++++-- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries b/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries index 37a6120a5d..38b8484243 100644 --- a/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries +++ b/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries @@ -22,7 +22,7 @@ SELECT count(*) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ SELECT sum(intMetric1), sumMV(intDimMV1), min(intMetric1), minMV(intDimMV2), max(longDimSV1), maxMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ SELECT count(longDimSV1), countMV(intDimMV1), avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ SELECT percentile(longDimSV1, 80), percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ -SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ +SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ # Selection SELECT longDimSV2, stringDimSV1, textDim1, bytesDimSV1 FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ ORDER BY longDimSV2 LIMIT 9 @@ -46,14 +46,14 @@ SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest1 WHERE generationNumber SELECT longDimSV1, intDimMV1, sum(intMetric1), sumMV(intDimMV1), min(intMetric1), minMV(intDimMV2), max(longDimSV1), maxMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5 SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1), avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5 SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80), percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5 -SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5 +SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5 # Selection & Filtering & Grouping on Aggregation SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5 SELECT longDimSV1, intDimMV1, sum(intMetric1), sumMV(intDimMV1), min(intMetric1), minMV(intDimMV2), max(longDimSV1), maxMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5 SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1), avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5 SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80), percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, in [...] -SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5 +SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5 # Transformation Functions SELECT add(longDimSV1, sub(longDimSV2, 3)), mod(intMetric1, 10), div(doubleMetric1, mult(floatMetric1, 5)) FROM FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ ORDER BY add(longDimSV1, sub(longDimSV2, 3)) DESC, mod(intMetric1, 10) diff --git a/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries b/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries index da9c43d7ad..3627205534 100644 --- a/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries +++ b/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries @@ -32,7 +32,7 @@ SELECT sum(intMetric1), sumMV(intDimMV1), min(intMetric1), minMV(intDimMV2), max SELECT count(longDimSV1), countMV(intDimMV1), avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ SELECT percentile(longDimSV1, 80), percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ SELECT percentile(longDimSV1, 80.01), percentileMV(intDimMV1, 99.99), percentileEst(longDimSV1, 80.01), percentileEstMV(intDimMV1, 99.99), percentileTDigest(longDimSV1, 80.01), percentileTDigestMV(intDimMV1, 99.99) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ -SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ +SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ # Selection & Filtering & Grouping on Aggregation SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20 @@ -40,7 +40,7 @@ SELECT longDimSV1, intDimMV1, sum(intMetric1), sumMV(intDimMV1), min(intMetric1) SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1), avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20 SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80), percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, in [...] SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80.01), percentileMV(intDimMV1, 99.99), percentileEst(longDimSV1, 80.01), percentileEstMV(intDimMV1, 99.99), percentileTDigest(longDimSV1, 80.01), percentileTDigestMV(intDimMV1, 99.99) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER [...] -SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20 +SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20 # Transformation Functions SELECT DISTINCT add(longDimSV1, sub(longDimSV2, 3)), mod(intMetric1, 10), div(doubleMetric1, mult(floatMetric1, 5)) FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ ORDER BY add(longDimSV1, sub(longDimSV2, 3)) DESC, mod(intMetric1, 10), div(doubleMetric1, mult(floatMetric1, 5)) diff --git a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results index 83ae247116..aad84fc46e 100644 --- a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results +++ b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results @@ -22,7 +22,7 @@ {"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[4.294967536E9,-2.147479976E9,0.0,6.0,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":40,"numGroupsLi [...] {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[10,19,114.09000263214111,1516.9,250.00000000000003,6656.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFil [...] {"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["percentile(longDimSV1, 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 80.0)","percentiletdigestmv(intDimMV1, 90.0)"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegme [...] -{"resultTable":{"dataSchema":{"columnDataTypes":["INT","INT","LONG","LONG"],"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[6,8,6,8]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":20,"numGroupsLimitReached":false,"totalDocs":10,"timeUsedMs":6,"segmentStati [...] +{"resultTable":{"dataSchema":{"columnDataTypes":["INT","INT","LONG","LONG","LONG"],"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[6,8,6,8,6]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":20,"numGroupsLimitReached":f [...] # Selection {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","STRING","STRING","BYTES"],"columnNames":["longDimSV2","stringDimSV1","textDim1","bytesDimSV1"]},"rows":[[2,"s1-0","Java C++ Python","4877625602"],[2,"s1-0","Java C++ Python","01a0bc"],[21,"s1-2","Java C++ golang","13225573e3f5"],[21,"s1-2","Java C++ golang","deadbeef"],[22,"s1-4","Java C++ golang","deed0507"],[32,"s1-5","golang shell bash",""],[6777,"s1-7","golang Java","d54d0507"],[7621,"s1-6","C++ golang python","deed0507"],[7621 [...] @@ -42,14 +42,14 @@ {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,0.0,-2.147483648E9,0.0,22.0,-9.223372036854776E18,-2.147483648E9],[1,3,20.0,14.0,10.0,6.0,1.0,4.0],[1,4,20.0,14.0,10.0,6.0,1.0,4.0],[11,42,20.0,148.0,10.0,62.0,11.0,42.0],[11,32,20 [...] {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[-9223372036854775808,-2147483648,1,1,0.0,57.0,0.0,70.0],[1,3,2,4,12.100000381469727,6.5,0.0,1.0],[1,4,2,4,12.100000381469727,6.5,0.0,1.0],[11,42,2,4,22.100000381469727,67.0,0.0,10.0],[11,32,2,4,22.1000 [...] {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1, 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 80.0)","percentiletdigestmv(intDimMV1, 90.0)"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720368547 [...] -{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[1,3,1,2,1,2],[1,4,1,2,1,2],[11,42,1,2,1,2],[11,32,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatche [...] +{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[1,3,1,2,1,2,1],[1,4,1,2,1,2,1],[11,42,1,2,1,2,1],[11,32,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmen [...] # Selection & Filtering & Grouping on Aggregation {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG"],"columnNames":["longDimSV1","intDimMV1","count(*)"]},"rows":[[-9223372036854775808,-2147483648,1],[11,32,2],[11,42,2],[41,42,1],[41,52,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":4,"numEntriesScannedPostFilter":8,"numGroupsLimitReached":false,"totalDocs":10,"timeUsedMs":8,"segmentStatistics":[],"traceInfo":{}, [...] {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,0,-2147483648,0,22,-9223372036854776000,-2147483648],[11,32,20,148,10,62,11,42],[11,42,20,148,10,62,11,42],[41,42,14,94,14,72,41,52],[41,52,14,94,14,72,41,52]]},"exceptions":[],"nu [...] {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[-9223372036854775808,-2147483648,1,1,0,57,0,70],[11,32,2,4,22.100000381469727,67,0,10],[11,42,2,4,22.100000381469727,67,0,10],[41,42,1,2,24.100000381469727,77,0,10],[41,52,1,2,24.100000381469727,77,0,1 [...] {"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1, 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 80.0)","percentiletdigestmv(intDimMV1, 90.0)"]},"rows":[[-9223372036854775808,-2147483648,-9223372036854775808,-2147483648,-9223372036854775808,-2147483648,-9223372036854776000 [...] -{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[11,32,1,2,1,2],[11,42,1,2,1,2],[41,42,1,2,1,2],[41,52,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMa [...] +{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[11,32,1,2,1,2,1],[11,42,1,2,1,2,1],[41,42,1,2,1,2,1],[41,52,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSe [...] # Transformation Functions {"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE"],"columnNames":["add(longDimSV1,sub(longDimSV2,'3'))","mod(intMetric1,'10')","div(doubleMetric1,mult(floatMetric1,'5'))"]},"rows":[[15229.0,1.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[13540.0,7.0,0.20076306285631254],[60.0,4.0,0.1999999968342762],[29.0,0.0,0.20904977014723267],[29.0,0.0,0.20904977014723267],[0.0,0.0,0.21652891879345226],[0.0,0.0,0.2165289187934522 [...] diff --git a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results index 47f7a2805c..849020c104 100644 --- a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results +++ b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results @@ -29,7 +29,7 @@ {"resultTable":{"dataSchema":{"columnNames":["count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"],"columnDataTypes":["LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"]},"rows":[[66,125,105.11969939145175,1383.2575757575758,250.00000000000003,6656.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesS [...] {"resultTable":{"dataSchema":{"columnNames":["percentile(longDimSV1, 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 80.0)","percentiletdigestmv(intDimMV1, 90.0)"],"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegme [...] {"resultTable":{"dataSchema":{"columnNames":["percentile(longDimSV1, 80.01)","percentilemv(intDimMV1, 99.99)","percentileest(longDimSV1, 80.01)","percentileestmv(intDimMV1, 99.99)","percentiletdigest(longDimSV1, 80.01)","percentiletdigestmv(intDimMV1, 99.99)"],"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"nu [...] -{"resultTable":{"dataSchema":{"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"],"columnDataTypes":["INT","INT","LONG","LONG"]},"rows":[[6,8,6,8]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesScannedPostFilter":132,"numGroupsLimitReached":false,"totalDocs":66,"timeUsedMs":5,"offlineThre [...] +{"resultTable":{"dataSchema":{"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"],"columnDataTypes":["INT","INT","LONG","LONG","LONG"]},"rows":[[6,8,6,8,6]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesScannedPostFilter":132,"numGroupsLimitReached": [...] # Selection & Filtering & Grouping on Aggregation {"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","count(*)"],"columnDataTypes":["LONG","INT","LONG"]},"rows":[[-9223372036854775808,-2147483648,7],[11,32,14],[11,42,14],[41,42,7],[41,52,7]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":28,"numEntriesScannedPostFilter":56,"numGroupsLimitReached":false,"totalDocs":66,"timeUsedMs":6,"offlineThreadCpuTimeNs":0,"realti [...] @@ -37,7 +37,7 @@ {"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"],"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,7,7,0.0,57.0,0.0,70.0],[11,32,14,28,22.100000381469727,67.0,0.0,10.0],[11,42,14,28,22.100000381469727,67.0,0.0,10.0],[41,42,7,14,24.100000381469727,77.0,0.0,10.0],[41,5 [...] {"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1, 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 80.0)","percentiletdigestmv(intDimMV1, 90.0)"],"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720368547 [...] {"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1, 80.01)","percentilemv(intDimMV1, 99.99)","percentileest(longDimSV1, 80.01)","percentileestmv(intDimMV1, 99.99)","percentiletdigest(longDimSV1, 80.01)","percentiletdigestmv(intDimMV1, 99.99)"],"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720 [...] -{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"],"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[11,32,1,2,1,2],[11,42,1,2,1,2],[41,42,1,2,1,2],[41,52,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMa [...] +{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"],"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[11,32,1,2,1,2,1],[11,42,1,2,1,2,1],[41,42,1,2,1,2,1],[41,52,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSe [...] # Transformation Functions {"resultTable":{"dataSchema":{"columnNames":["add(longDimSV1,sub(longDimSV2,'3'))","mod(intMetric1,'10')","div(doubleMetric1,mult(floatMetric1,'5'))"],"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE"]},"rows":[[15229.0,1.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[13540.0,7.0,0.20076306285631254],[60.0,4.0,0.1999999968342762],[29.0,0.0,0.20904977014723267],[0.0,0.0,0.21652891879345226],[-9.223372036854776E18,0.0,"Infinity"]]},"exceptions":[],"numServersQueried":1,"numServersRe [...] diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java index 4a9a846acd..9f250aff8a 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java @@ -985,8 +985,8 @@ public class DistinctCountThetaSketchAggregationFunction int numAccumulators = acc1.size(); List<ThetaSketchAccumulator> mergedAccumulators = new ArrayList<>(numAccumulators); for (int i = 0; i < numAccumulators; i++) { - ThetaSketchAccumulator thetaSketchAccumulator1 = acc1.get(i); - ThetaSketchAccumulator thetaSketchAccumulator2 = acc2.get(i); + ThetaSketchAccumulator thetaSketchAccumulator1 = convertSketchAccumulator(acc1.get(i)); + ThetaSketchAccumulator thetaSketchAccumulator2 = convertSketchAccumulator(acc2.get(i)); if (thetaSketchAccumulator1.isEmpty()) { mergedAccumulators.add(thetaSketchAccumulator2); continue; @@ -1025,6 +1025,20 @@ public class DistinctCountThetaSketchAggregationFunction return Math.round(evaluatePostAggregationExpression(_postAggregationExpression, mergedSketches).getEstimate()); } + // This ensures backward compatibility with servers that still return sketches directly. + // The AggregationDataTableReducer casts intermediate results to Objects and although the code compiles, + // types might still be incompatible at runtime due to type erasure. + // Due to performance overheads of redundant casts, this should be removed at some future point. + private ThetaSketchAccumulator convertSketchAccumulator(Object mergeResult) { + if (mergeResult instanceof Sketch) { + Sketch sketch = (Sketch) mergeResult; + ThetaSketchAccumulator accumulator = new ThetaSketchAccumulator(_setOperationBuilder, _accumulatorThreshold); + accumulator.apply(sketch); + return accumulator; + } + return (ThetaSketchAccumulator) mergeResult; + } + /** * Helper method to collect expressions in the filter. */ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org