Repository: spark Updated Branches: refs/heads/branch-2.0 9a8df0c9a -> d7bf318a0
http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q85.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q85.sql b/sql/core/src/test/resources/tpcds/q85.sql new file mode 100755 index 0000000..cf718b0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q85.sql @@ -0,0 +1,82 @@ +SELECT + substr(r_reason_desc, 1, 20), + avg(ws_quantity), + avg(wr_refunded_cash), + avg(wr_fee) +FROM web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason +WHERE ws_web_page_sk = wp_web_page_sk + AND ws_item_sk = wr_item_sk + AND ws_order_number = wr_order_number + AND ws_sold_date_sk = d_date_sk AND d_year = 2000 + AND cd1.cd_demo_sk = wr_refunded_cdemo_sk + AND cd2.cd_demo_sk = wr_returning_cdemo_sk + AND ca_address_sk = wr_refunded_addr_sk + AND r_reason_sk = wr_reason_sk + AND + ( + ( + cd1.cd_marital_status = 'M' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'Advanced Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 100.00 AND 150.00 + ) + OR + ( + cd1.cd_marital_status = 'S' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'College' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 50.00 AND 100.00 + ) + OR + ( + cd1.cd_marital_status = 'W' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = '2 yr Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 150.00 AND 200.00 + ) + ) + AND + ( + ( + ca_country = 'United States' + AND + ca_state IN ('IN', 'OH', 'NJ') + AND ws_net_profit BETWEEN 100 AND 200 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('WI', 'CT', 'KY') + AND ws_net_profit BETWEEN 150 AND 300 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('LA', 'IA', 'AR') + AND ws_net_profit BETWEEN 50 AND 250 + ) + ) +GROUP BY r_reason_desc +ORDER BY substr(r_reason_desc, 1, 20) + , avg(ws_quantity) + , avg(wr_refunded_cash) + , avg(wr_fee) +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q86.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q86.sql b/sql/core/src/test/resources/tpcds/q86.sql new file mode 100755 index 0000000..789a4ab --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q86.sql @@ -0,0 +1,24 @@ +SELECT + sum(ws_net_paid) AS total_sum, + i_category, + i_class, + grouping(i_category) + grouping(i_class) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(i_category) + grouping(i_class), + CASE WHEN grouping(i_class) = 0 + THEN i_category END + ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent +FROM + web_sales, date_dim d1, item +WHERE + d1.d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d1.d_date_sk = ws_sold_date_sk + AND i_item_sk = ws_item_sk +GROUP BY ROLLUP (i_category, i_class) +ORDER BY + lochierarchy DESC, + CASE WHEN lochierarchy = 0 + THEN i_category END, + rank_within_parent +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q87.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q87.sql b/sql/core/src/test/resources/tpcds/q87.sql new file mode 100755 index 0000000..4aaa9f3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q87.sql @@ -0,0 +1,28 @@ +SELECT count(*) +FROM ((SELECT DISTINCT + c_last_name, + c_first_name, + d_date +FROM store_sales, date_dim, customer +WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM catalog_sales, date_dim, customer + WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM web_sales, date_dim, customer + WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk + AND web_sales.ws_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + ) cool_cust http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q88.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q88.sql b/sql/core/src/test/resources/tpcds/q88.sql new file mode 100755 index 0000000..25bcd90 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q88.sql @@ -0,0 +1,122 @@ +SELECT * +FROM + (SELECT count(*) h8_30_to_9 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 8 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s1, + (SELECT count(*) h9_to_9_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s2, + (SELECT count(*) h9_30_to_10 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s3, + (SELECT count(*) h10_to_10_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s4, + (SELECT count(*) h10_30_to_11 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s5, + (SELECT count(*) h11_to_11_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s6, + (SELECT count(*) h11_30_to_12 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s7, + (SELECT count(*) h12_to_12_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 12 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s8 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q89.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q89.sql b/sql/core/src/test/resources/tpcds/q89.sql new file mode 100755 index 0000000..75408cb --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q89.sql @@ -0,0 +1,30 @@ +SELECT * +FROM ( + SELECT + i_category, + i_class, + i_brand, + s_store_name, + s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER + (PARTITION BY i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + d_year IN (1999) AND + ((i_category IN ('Books', 'Electronics', 'Sports') AND + i_class IN ('computers', 'stereo', 'football')) + OR (i_category IN ('Men', 'Jewelry', 'Women') AND + i_class IN ('shirts', 'birdal', 'dresses'))) + GROUP BY i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +WHERE CASE WHEN (avg_monthly_sales <> 0) + THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, s_store_name +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q9.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q9.sql b/sql/core/src/test/resources/tpcds/q9.sql new file mode 100755 index 0000000..de3db9d --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q9.sql @@ -0,0 +1,48 @@ +SELECT + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 +FROM reason +WHERE r_reason_sk = 1 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q90.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q90.sql b/sql/core/src/test/resources/tpcds/q90.sql new file mode 100755 index 0000000..85e35bf --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q90.sql @@ -0,0 +1,19 @@ +SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio +FROM (SELECT count(*) amc +FROM web_sales, household_demographics, time_dim, web_page +WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 8 AND 8 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, + (SELECT count(*) pmc + FROM web_sales, household_demographics, time_dim, web_page + WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 19 AND 19 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt +ORDER BY am_pm_ratio +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q91.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q91.sql b/sql/core/src/test/resources/tpcds/q91.sql new file mode 100755 index 0000000..9ca7ce0 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q91.sql @@ -0,0 +1,23 @@ +SELECT + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +FROM + call_center, catalog_returns, date_dim, customer, customer_address, + customer_demographics, household_demographics +WHERE + cr_call_center_sk = cc_call_center_sk + AND cr_returned_date_sk = d_date_sk + AND cr_returning_customer_sk = c_customer_sk + AND cd_demo_sk = c_current_cdemo_sk + AND hd_demo_sk = c_current_hdemo_sk + AND ca_address_sk = c_current_addr_sk + AND d_year = 1998 + AND d_moy = 11 + AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') + OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) + AND hd_buy_potential LIKE 'Unknown%' + AND ca_gmt_offset = -7 +GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status +ORDER BY sum(cr_net_loss) DESC http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q92.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q92.sql b/sql/core/src/test/resources/tpcds/q92.sql new file mode 100755 index 0000000..99129c3 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q92.sql @@ -0,0 +1,16 @@ +SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` +FROM web_sales, item, date_dim +WHERE i_manufact_id = 350 + AND i_item_sk = ws_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + AND ws_ext_discount_amt > + ( + SELECT 1.3 * avg(ws_ext_discount_amt) + FROM web_sales, date_dim + WHERE ws_item_sk = i_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + ) +ORDER BY sum(ws_ext_discount_amt) +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q93.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q93.sql b/sql/core/src/test/resources/tpcds/q93.sql new file mode 100755 index 0000000..222dc31 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q93.sql @@ -0,0 +1,19 @@ +SELECT + ss_customer_sk, + sum(act_sales) sumsales +FROM (SELECT + ss_item_sk, + ss_ticket_number, + ss_customer_sk, + CASE WHEN sr_return_quantity IS NOT NULL + THEN (ss_quantity - sr_return_quantity) * ss_sales_price + ELSE (ss_quantity * ss_sales_price) END act_sales +FROM store_sales + LEFT OUTER JOIN store_returns + ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) + , + reason +WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t +GROUP BY ss_customer_sk +ORDER BY sumsales, ss_customer_sk +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q94.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q94.sql b/sql/core/src/test/resources/tpcds/q94.sql new file mode 100755 index 0000000..d6de3d7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q94.sql @@ -0,0 +1,23 @@ +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND EXISTS(SELECT * + FROM web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + AND NOT EXISTS(SELECT * + FROM web_returns wr1 + WHERE ws1.ws_order_number = wr1.wr_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q95.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q95.sql b/sql/core/src/test/resources/tpcds/q95.sql new file mode 100755 index 0000000..df71f00 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q95.sql @@ -0,0 +1,29 @@ +WITH ws_wh AS +(SELECT + ws1.ws_order_number, + ws1.ws_warehouse_sk wh1, + ws2.ws_warehouse_sk wh2 + FROM web_sales ws1, web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND ws1.ws_order_number IN (SELECT ws_order_number + FROM ws_wh) + AND ws1.ws_order_number IN (SELECT wr_order_number + FROM web_returns, ws_wh + WHERE wr_order_number = ws_wh.ws_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q96.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q96.sql b/sql/core/src/test/resources/tpcds/q96.sql new file mode 100755 index 0000000..7ab17e7 --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q96.sql @@ -0,0 +1,11 @@ +SELECT count(*) +FROM store_sales, household_demographics, time_dim, store +WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 20 + AND time_dim.t_minute >= 30 + AND household_demographics.hd_dep_count = 7 + AND store.s_store_name = 'ese' +ORDER BY count(*) +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q97.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q97.sql b/sql/core/src/test/resources/tpcds/q97.sql new file mode 100755 index 0000000..e7e0b1a --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q97.sql @@ -0,0 +1,30 @@ +WITH ssci AS ( + SELECT + ss_customer_sk customer_sk, + ss_item_sk item_sk + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY ss_customer_sk, ss_item_sk), + csci AS ( + SELECT + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY cs_bill_customer_sk, cs_item_sk) +SELECT + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL + THEN 1 + ELSE 0 END) store_only, + sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) catalog_only, + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) store_and_catalog +FROM ssci + FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk + AND ssci.item_sk = csci.item_sk) +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q98.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q98.sql b/sql/core/src/test/resources/tpcds/q98.sql new file mode 100755 index 0000000..bb10d4b --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q98.sql @@ -0,0 +1,21 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(ss_ext_sales_price) AS itemrevenue, + sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM + store_sales, item, date_dim +WHERE + ss_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) + AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY + i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY + i_category, i_class, i_item_id, i_item_desc, revenueratio http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/resources/tpcds/q99.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpcds/q99.sql b/sql/core/src/test/resources/tpcds/q99.sql new file mode 100755 index 0000000..f1a3d4d --- /dev/null +++ b/sql/core/src/test/resources/tpcds/q99.sql @@ -0,0 +1,34 @@ +SELECT + substr(w_warehouse_name, 1, 20), + sm_type, + cc_name, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND + (cs_ship_date_sk - cs_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND + (cs_ship_date_sk - cs_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND + (cs_ship_date_sk - cs_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + catalog_sales, warehouse, ship_mode, call_center, date_dim +WHERE + d_month_seq BETWEEN 1200 AND 1200 + 11 + AND cs_ship_date_sk = d_date_sk + AND cs_warehouse_sk = w_warehouse_sk + AND cs_ship_mode_sk = sm_ship_mode_sk + AND cs_call_center_sk = cc_call_center_sk +GROUP BY + substr(w_warehouse_name, 1, 20), sm_type, cc_name +ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name +LIMIT 100 http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala new file mode 100644 index 0000000..398d8d9 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import java.io.File + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.Benchmark + +/** + * Benchmark to measure TPCDS query performance. + * To run this: + * spark-submit --class <this class> --jars <spark sql test jar> + */ +object TPCDSQueryBenchmark { + val conf = + new SparkConf() + .setMaster("local[1]") + .setAppName("test-sql-context") + .set("spark.sql.parquet.compression.codec", "snappy") + .set("spark.sql.shuffle.partitions", "4") + .set("spark.driver.memory", "3g") + .set("spark.executor.memory", "3g") + .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString) + + val spark = SparkSession.builder.config(conf).getOrCreate() + + val tables = Seq("catalog_page", "catalog_returns", "customer", "customer_address", + "customer_demographics", "date_dim", "household_demographics", "inventory", "item", + "promotion", "store", "store_returns", "catalog_sales", "web_sales", "store_sales", + "web_returns", "web_site", "reason", "call_center", "warehouse", "ship_mode", "income_band", + "time_dim", "web_page") + + def setupTables(dataLocation: String): Map[String, Long] = { + tables.map { tableName => + spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName) + tableName -> spark.table(tableName).count() + }.toMap + } + + def tpcdsAll(dataLocation: String, queries: Seq[String]): Unit = { + require(dataLocation.nonEmpty, + "please modify the value of dataLocation to point to your local TPCDS data") + val tableSizes = setupTables(dataLocation) + spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true") + spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true") + queries.foreach { name => + val queryString = fileToString(new File(Thread.currentThread().getContextClassLoader + .getResource(s"tpcds/$name.sql").getFile)) + + // This is an indirect hack to estimate the size of each query's input by traversing the + // logical plan and adding up the sizes of all tables that appear in the plan. Note that this + // currently doesn't take WITH subqueries into account which might lead to fairly inaccurate + // per-row processing time for those cases. + val queryRelations = scala.collection.mutable.HashSet[String]() + spark.sql(queryString).queryExecution.logical.map { + case ur @ UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table) + case lp: LogicalPlan => + lp.expressions.foreach { _ foreach { + case subquery: SubqueryExpression => + subquery.plan.foreach { + case ur @ UnresolvedRelation(t: TableIdentifier, _) => + queryRelations.add(t.table) + case _ => + } + case _ => + } + } + case _ => + } + val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum + val benchmark = new Benchmark(s"TPCDS Snappy", numRows, 5) + benchmark.addCase(name) { i => + spark.sql(queryString).collect() + } + benchmark.run() + } + } + + def main(args: Array[String]): Unit = { + + // List of all TPC-DS queries + val tpcdsQueries = Seq( + "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", + "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", + "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", + "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", + "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", + "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", + "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", + "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", + "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", + "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") + + // In order to run this benchmark, please follow the instructions at + // https://github.com/databricks/spark-sql-perf/blob/master/README.md to generate the TPCDS data + // locally (preferably with a scale factor of 5 for benchmarking). Thereafter, the value of + // dataLocation below needs to be set to the location where the generated data is stored. + val dataLocation = "" + + tpcdsAll(dataLocation, queries = tpcdsQueries) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/d7bf318a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala deleted file mode 100644 index 228ae6f..0000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala +++ /dev/null @@ -1,1226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.parquet - -import org.apache.spark.SparkConf -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation -import org.apache.spark.util.Benchmark - -/** - * Benchmark to measure TPCDS query performance. - * To run this: - * spark-submit --class <this class> --jars <spark sql test jar> - */ -object TPCDSBenchmark { - val conf = new SparkConf() - conf.set("spark.sql.parquet.compression.codec", "snappy") - conf.set("spark.sql.shuffle.partitions", "4") - conf.set("spark.driver.memory", "3g") - conf.set("spark.executor.memory", "3g") - conf.set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString) - conf.setMaster("local[1]") - conf.setAppName("test-sql-context") - - val spark = SparkSession.builder.config(conf).getOrCreate() - - // These queries a subset of the TPCDS benchmark queries and are taken from - // https://github.com/databricks/spark-sql-perf/blob/master/src/main/scala/com/databricks/spark/ - // sql/perf/tpcds/ImpalaKitQueries.scala - val tpcds = Seq( - ("q19", """ - |select - | i_brand_id, - | i_brand, - | i_manufact_id, - | i_manufact, - | sum(ss_ext_sales_price) ext_price - |from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | join customer on (store_sales.ss_customer_sk = customer.c_customer_sk) - | join customer_address on - | (customer.c_current_addr_sk = customer_address.ca_address_sk) - |where - | ss_sold_date_sk between 2451484 and 2451513 - | and d_moy = 11 - | and d_year = 1999 - | and i_manager_id = 7 - | and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) - |group by - | i_brand, - | i_brand_id, - | i_manufact_id, - | i_manufact - |order by - | ext_price desc, - | i_brand, - | i_brand_id, - | i_manufact_id, - | i_manufact - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q19 1710 / 1858 8.7 114.5 1.0X - */ - - ("q27", """ - |select - | i_item_id, - | s_state, - | avg(ss_quantity) agg1, - | avg(ss_list_price) agg2, - | avg(ss_coupon_amt) agg3, - | avg(ss_sales_price) agg4 - |from - | store_sales - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join customer_demographics on - | (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk) - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - |where - | ss_sold_date_sk between 2450815 and 2451179 -- partition key filter - | and d_year = 1998 - | and cd_gender = 'F' - | and cd_marital_status = 'W' - | and cd_education_status = 'Primary' - | and s_state in ('WI', 'CA', 'TX', 'FL', 'WA', 'TN') - |group by - | i_item_id, - | s_state - |order by - | i_item_id, - | s_state - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q27 2016 / 2180 8.2 122.6 1.0X - */ - - ("q3", """ - |select - | dt.d_year, - | item.i_brand_id brand_id, - | item.i_brand brand, - | sum(ss_ext_sales_price) sum_agg - |from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join date_dim dt on (dt.d_date_sk = store_sales.ss_sold_date_sk) - |where - | item.i_manufact_id = 436 - | and dt.d_moy = 12 - | and (ss_sold_date_sk between 2451149 and 2451179 - | or ss_sold_date_sk between 2451514 and 2451544 - | or ss_sold_date_sk between 2451880 and 2451910 - | or ss_sold_date_sk between 2452245 and 2452275 - | or ss_sold_date_sk between 2452610 and 2452640) - |group by - | d_year, - | item.i_brand, - | item.i_brand_id - |order by - | d_year, - | sum_agg desc, - | brand_id - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q3 1073 / 1140 13.5 73.9 1.0X - */ - - ("q34", """ - |select - | c_last_name, - | c_first_name, - | c_salutation, - | c_preferred_cust_flag, - | ss_ticket_number, - | cnt - |from - | (select - | ss_ticket_number, - | ss_customer_sk, - | count(*) cnt - | from - | store_sales - | join household_demographics on - | (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk) - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | date_dim.d_year in (1998, 1998 + 1, 1998 + 2) - | and (date_dim.d_dom between 1 and 3 - | or date_dim.d_dom between 25 and 28) - | and (household_demographics.hd_buy_potential = '>10000' - | or household_demographics.hd_buy_potential = 'unknown') - | and household_demographics.hd_vehicle_count > 0 - | and (case when household_demographics.hd_vehicle_count > 0 then - | household_demographics.hd_dep_count / household_demographics.hd_vehicle_count - | else null end) > 1.2 - | and ss_sold_date_sk between 2450816 and 2451910 -- partition key filter - | group by - | ss_ticket_number, - | ss_customer_sk - | ) dn - |join customer on (dn.ss_customer_sk = customer.c_customer_sk) - |where - | cnt between 15 and 20 - |order by - | c_last_name, - | c_first_name, - | c_salutation, - | c_preferred_cust_flag desc, - | ss_ticket_number, - | cnt - |limit 1000 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q34 1482 / 1734 10.0 100.4 1.0X - */ - - ("q42", """ - |select - | d_year, - | i_category_id, - | i_category, - | sum(ss_ext_sales_price) as total_price - |from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join date_dim dt on (dt.d_date_sk = store_sales.ss_sold_date_sk) - |where - | item.i_manager_id = 1 - | and dt.d_moy = 12 - | and dt.d_year = 1998 - | and ss_sold_date_sk between 2451149 and 2451179 -- partition key filter - |group by - | d_year, - | i_category_id, - | i_category - |order by - | total_price desc, - | d_year, - | i_category_id, - | i_category - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q42 1125 / 1357 12.9 77.4 1.0X - */ - - ("q43", """ - |select - | s_store_name, - | s_store_id, - | sum(case when (d_day_name = 'Sunday') then ss_sales_price else null end) sun_sales, - | sum(case when (d_day_name = 'Monday') then ss_sales_price else null end) mon_sales, - | sum(case when (d_day_name = 'Tuesday') then - | ss_sales_price else null end) tue_sales, - | sum(case when (d_day_name = 'Wednesday') then - | ss_sales_price else null end) wed_sales, - | sum(case when (d_day_name = 'Thursday') then - | ss_sales_price else null end) thu_sales, - | sum(case when (d_day_name = 'Friday') then ss_sales_price else null end) fri_sales, - | sum(case when (d_day_name = 'Saturday') then - | ss_sales_price else null end) sat_sales - |from - | store_sales - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - |where - | s_gmt_offset = -5 - | and d_year = 1998 - | and ss_sold_date_sk between 2450816 and 2451179 -- partition key filter - |group by - | s_store_name, - | s_store_id - |order by - | s_store_name, - | s_store_id, - | sun_sales, - | mon_sales, - | tue_sales, - | wed_sales, - | thu_sales, - | fri_sales, - | sat_sales - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q43 1681 / 1985 8.6 116.1 1.0X - */ - - ("q46", """ - |select - | c_last_name, - | c_first_name, - | ca_city, - | bought_city, - | ss_ticket_number, - | amt, - | profit - |from - | (select - | ss_ticket_number, - | ss_customer_sk, - | ca_city bought_city, - | sum(ss_coupon_amt) amt, - | sum(ss_net_profit) profit - | from - | store_sales - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join household_demographics on - | (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | join customer_address on - | (store_sales.ss_addr_sk = customer_address.ca_address_sk) - | where - | store.s_city in ('Midway', 'Concord', 'Spring Hill', 'Brownsville', 'Greenville') - | and (household_demographics.hd_dep_count = 5 - | or household_demographics.hd_vehicle_count = 3) - | and date_dim.d_dow in (6, 0) - | and date_dim.d_year in (1999, 1999 + 1, 1999 + 2) - | group by - | ss_ticket_number, - | ss_customer_sk, - | ss_addr_sk, - | ca_city - | ) dn - | join customer on (dn.ss_customer_sk = customer.c_customer_sk) - | join customer_address current_addr on - | (customer.c_current_addr_sk = current_addr.ca_address_sk) - |where - | current_addr.ca_city <> bought_city - |order by - | c_last_name, - | c_first_name, - | ca_city, - | bought_city, - | ss_ticket_number - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q46 2948 / 3218 5.1 196.1 1.0X - */ - - ("q52", """ - |select - | d_year, - | i_brand_id, - | i_brand, - | sum(ss_ext_sales_price) ext_price - |from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join date_dim dt on (store_sales.ss_sold_date_sk = dt.d_date_sk) - |where - | i_manager_id = 1 - | and d_moy = 12 - | and d_year = 1998 - | and ss_sold_date_sk between 2451149 and 2451179 -- partition key filter - |group by - | d_year, - | i_brand, - | i_brand_id - |order by - | d_year, - | ext_price desc, - | i_brand_id - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q52 1099 / 1228 13.2 75.7 1.0X - */ - - ("q53", """ - |select - | * - |from - | (select - | i_manufact_id, - | sum(ss_sales_price) sum_sales - | from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | ss_sold_date_sk between 2451911 and 2452275 -- partition key filter - | and d_month_seq in(1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, - | 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11) - | and ( - | (i_category in('Books', 'Children', 'Electronics') - | and i_class in('personal', 'portable', 'reference', 'self-help') - | and i_brand in('scholaramalgamalg #14', 'scholaramalgamalg #7', - | 'exportiunivamalg #9', 'scholaramalgamalg #9') - | ) - | or - | (i_category in('Women', 'Music', 'Men') - | and i_class in('accessories', 'classical', 'fragrances', 'pants') - | and i_brand in('amalgimporto #1', 'edu packscholar #1', - | 'exportiimporto #1', 'importoamalg #1') - | ) - | ) - | group by - | i_manufact_id, - | d_qoy - | ) tmp1 - |order by - | sum_sales, - | i_manufact_id - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q53 968 / 1020 15.0 66.6 1.0X - */ - - ("q55", """ - |select - | i_brand_id, - | i_brand, - | sum(ss_ext_sales_price) ext_price - |from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - |where - | i_manager_id = 36 - | and d_moy = 12 - | and d_year = 2001 - | and ss_sold_date_sk between 2452245 and 2452275 -- partition key filter - |group by - | i_brand, - | i_brand_id - |order by - | ext_price desc, - | i_brand_id - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q55 1002 / 1020 14.5 69.0 1.0X - */ - - ("q59", """ - |select - | s_store_name1, - | s_store_id1, - | d_week_seq1, - | sun_sales1 / sun_sales2, - | mon_sales1 / mon_sales2, - | tue_sales1 / tue_sales2, - | wed_sales1 / wed_sales2, - | thu_sales1 / thu_sales2, - | fri_sales1 / fri_sales2, - | sat_sales1 / sat_sales2 - |from - | (select - | s_store_name s_store_name1, - | wss.d_week_seq d_week_seq1, - | s_store_id s_store_id1, - | sun_sales sun_sales1, - | mon_sales mon_sales1, - | tue_sales tue_sales1, - | wed_sales wed_sales1, - | thu_sales thu_sales1, - | fri_sales fri_sales1, - | sat_sales sat_sales1 - | from - | (select - | d_week_seq, - | ss_store_sk, - | sum(case when(d_day_name = 'Sunday') then - | ss_sales_price else null end) sun_sales, - | sum(case when(d_day_name = 'Monday') then - | ss_sales_price else null end) mon_sales, - | sum(case when(d_day_name = 'Tuesday') then - | ss_sales_price else null end) tue_sales, - | sum(case when(d_day_name = 'Wednesday') then - | ss_sales_price else null end) wed_sales, - | sum(case when(d_day_name = 'Thursday') then - | ss_sales_price else null end) thu_sales, - | sum(case when(d_day_name = 'Friday') then - | ss_sales_price else null end) fri_sales, - | sum(case when(d_day_name = 'Saturday') then - | ss_sales_price else null end) sat_sales - | from - | store_sales - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | ss_sold_date_sk between 2451088 and 2451452 - | group by - | d_week_seq, - | ss_store_sk - | ) wss - | join store on (wss.ss_store_sk = store.s_store_sk) - | join date_dim d on (wss.d_week_seq = d.d_week_seq) - | where - | d_month_seq between 1185 and 1185 + 11 - | ) y - | join - | (select - | s_store_name s_store_name2, - | wss.d_week_seq d_week_seq2, - | s_store_id s_store_id2, - | sun_sales sun_sales2, - | mon_sales mon_sales2, - | tue_sales tue_sales2, - | wed_sales wed_sales2, - | thu_sales thu_sales2, - | fri_sales fri_sales2, - | sat_sales sat_sales2 - | from - | (select - | d_week_seq, - | ss_store_sk, - | sum(case when(d_day_name = 'Sunday') then - | ss_sales_price else null end) sun_sales, - | sum(case when(d_day_name = 'Monday') then - | ss_sales_price else null end) mon_sales, - | sum(case when(d_day_name = 'Tuesday') then - | ss_sales_price else null end) tue_sales, - | sum(case when(d_day_name = 'Wednesday') then - | ss_sales_price else null end) wed_sales, - | sum(case when(d_day_name = 'Thursday') then - | ss_sales_price else null end) thu_sales, - | sum(case when(d_day_name = 'Friday') then - | ss_sales_price else null end) fri_sales, - | sum(case when(d_day_name = 'Saturday') then - | ss_sales_price else null end) sat_sales - | from - | store_sales - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | ss_sold_date_sk between 2451088 and 2451452 - | group by - | d_week_seq, - | ss_store_sk - | ) wss - | join store on (wss.ss_store_sk = store.s_store_sk) - | join date_dim d on (wss.d_week_seq = d.d_week_seq) - | where - | d_month_seq between 1185 + 12 and 1185 + 23 - | ) x - | on (y.s_store_id1 = x.s_store_id2) - |where - | d_week_seq1 = d_week_seq2 - 52 - |order by - | s_store_name1, - | s_store_id1, - | d_week_seq1 - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q59 1624 / 1663 17.9 55.8 1.0X - */ - - ("q63", """ - |select - | * - |from - | (select - | i_manager_id, - | sum(ss_sales_price) sum_sales - | from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | ss_sold_date_sk between 2451911 and 2452275 -- partition key filter - | and d_month_seq in (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 5, - | 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11) - | and ( - | (i_category in('Books', 'Children', 'Electronics') - | and i_class in('personal', 'portable', 'refernece', 'self-help') - | and i_brand in('scholaramalgamalg #14', 'scholaramalgamalg #7', - | 'exportiunivamalg #9', 'scholaramalgamalg #9') - | ) - | or - | (i_category in('Women', 'Music', 'Men') - | and i_class in('accessories', 'classical', 'fragrances', 'pants') - | and i_brand in('amalgimporto #1', 'edu packscholar #1', - | 'exportiimporto #1', 'importoamalg #1') - | ) - | ) - | group by - | i_manager_id, - | d_moy - | ) tmp1 - |order by - | i_manager_id, - | sum_sales - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q63 979 / 1006 14.8 67.4 1.0X - */ - - ("q65", """ - |select - | s_store_name, - | i_item_desc, - | sc.revenue, - | i_current_price, - | i_wholesale_cost, - | i_brand - |from - | (select - | ss_store_sk, - | ss_item_sk, - | sum(ss_sales_price) as revenue - | from - | store_sales - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | ss_sold_date_sk between 2451911 and 2452275 -- partition key filter - | and d_month_seq between 1212 and 1212 + 11 - | group by - | ss_store_sk, - | ss_item_sk - | ) sc - | join item on (sc.ss_item_sk = item.i_item_sk) - | join store on (sc.ss_store_sk = store.s_store_sk) - | join - | (select - | ss_store_sk, - | avg(revenue) as ave - | from - | (select - | ss_store_sk, - | ss_item_sk, - | sum(ss_sales_price) as revenue - | from - | store_sales - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | ss_sold_date_sk between 2451911 and 2452275 -- partition key filter - | and d_month_seq between 1212 and 1212 + 11 - | group by - | ss_store_sk, - | ss_item_sk - | ) sa - | group by - | ss_store_sk - | ) sb on (sc.ss_store_sk = sb.ss_store_sk) -- 676 rows - |where - | sc.revenue <= 0.1 * sb.ave - |order by - | s_store_name, - | i_item_desc - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q65 7770 / 8097 3.7 267.9 1.0X - */ - - ("q68", """ - |select - | c_last_name, - | c_first_name, - | ca_city, - | bought_city, - | ss_ticket_number, - | extended_price, - | extended_tax, - | list_price - |from - | (select - | ss_ticket_number, - | ss_customer_sk, - | ca_city bought_city, - | sum(ss_ext_sales_price) extended_price, - | sum(ss_ext_list_price) list_price, - | sum(ss_ext_tax) extended_tax - | from - | store_sales - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join household_demographics on - | (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | join customer_address on - | (store_sales.ss_addr_sk = customer_address.ca_address_sk) - | where - | store.s_city in('Midway', 'Fairview') - | --and date_dim.d_dom between 1 and 2 - | --and date_dim.d_year in(1999, 1999 + 1, 1999 + 2) - | -- and ss_date between '1999-01-01' and '2001-12-31' - | -- and dayofmonth(ss_date) in (1,2) - | and (household_demographics.hd_dep_count = 5 - | or household_demographics.hd_vehicle_count = 3) - | and d_date between '1999-01-01' and '1999-03-31' - | and ss_sold_date_sk between 2451180 and 2451269 - | -- partition key filter (3 months) - | group by - | ss_ticket_number, - | ss_customer_sk, - | ss_addr_sk, - | ca_city - | ) dn - | join customer on (dn.ss_customer_sk = customer.c_customer_sk) - | join customer_address current_addr on - | (customer.c_current_addr_sk = current_addr.ca_address_sk) - |where - | current_addr.ca_city <> bought_city - |order by - | c_last_name, - | ss_ticket_number - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q68 3105 / 3405 4.8 206.5 1.0X - */ - - ("q7", """ - |select - | i_item_id, - | avg(ss_quantity) agg1, - | avg(ss_list_price) agg2, - | avg(ss_coupon_amt) agg3, - | avg(ss_sales_price) agg4 - |from - | store_sales - | join customer_demographics on - | (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk) - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join promotion on (store_sales.ss_promo_sk = promotion.p_promo_sk) - | join date_dim on (ss_sold_date_sk = d_date_sk) - |where - | cd_gender = 'F' - | and cd_marital_status = 'W' - | and cd_education_status = 'Primary' - | and (p_channel_email = 'N' - | or p_channel_event = 'N') - | and d_year = 1998 - | and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter - |group by - | i_item_id - |order by - | i_item_id - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q7 2042 / 2333 8.1 124.2 1.0X - */ - - ("q73", """ - |select - | c_last_name, - | c_first_name, - | c_salutation, - | c_preferred_cust_flag, - | ss_ticket_number, - | cnt - |from - | (select - | ss_ticket_number, - | ss_customer_sk, - | count(*) cnt - | from - | store_sales - | join household_demographics on - | (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk) - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | -- join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | store.s_county in - | ('Williamson County','Franklin Parish','Bronx County','Orange County') - | -- and date_dim.d_dom between 1 and 2 - | -- and date_dim.d_year in(1998, 1998 + 1, 1998 + 2) - | -- and ss_date between '1999-01-01' and '2001-12-02' - | -- and dayofmonth(ss_date) in (1,2) - | -- partition key filter - | -- and ss_sold_date_sk in (2450816, 2450846, 2450847, 2450874, 2450875, 2450905, - | -- 2450906, 2450935, 2450936, 2450966, 2450967, - | -- 2450996, 2450997, 2451027, 2451028, 2451058, 2451059, - | -- 2451088, 2451089, 2451119, 2451120, 2451149, - | -- 2451150, 2451180, 2451181, 2451211, 2451212, 2451239, - | -- 2451240, 2451270, 2451271, 2451300, 2451301, - | -- 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, - | -- 2451423, 2451424, 2451453, 2451454, 2451484, - | -- 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, - | -- 2451577, 2451605, 2451606, 2451636, 2451637, - | -- 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, - | -- 2451758, 2451759, 2451789, 2451790, 2451819, - | -- 2451820, 2451850, 2451851, 2451880, 2451881) - | and (household_demographics.hd_buy_potential = '>10000' - | or household_demographics.hd_buy_potential = 'unknown') - | and household_demographics.hd_vehicle_count > 0 - | and case when household_demographics.hd_vehicle_count > 0 then - | household_demographics.hd_dep_count / household_demographics.hd_vehicle_count - | else null end > 1 - | and ss_sold_date_sk between 2451180 and 2451269 - | -- partition key filter (3 months) - | group by - | ss_ticket_number, - | ss_customer_sk - | ) dj - | join customer on (dj.ss_customer_sk = customer.c_customer_sk) - |where - | cnt between 1 and 5 - |order by - | cnt desc - |limit 1000 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q73 1124 / 1221 13.1 76.5 1.0X - */ - - ("q79", """ - |select - | c_last_name, - | c_first_name, - | substr(s_city, 1, 30) as city, - | ss_ticket_number, - | amt, - | profit - |from - | (select - | ss_ticket_number, - | ss_customer_sk, - | s_city, - | sum(ss_coupon_amt) amt, - | sum(ss_net_profit) profit - | from - | store_sales - | join household_demographics on - | (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | where - | store.s_number_employees between 200 and 295 - | and (household_demographics.hd_dep_count = 8 - | or household_demographics.hd_vehicle_count > 0) - | and date_dim.d_dow = 1 - | and date_dim.d_year in (1998, 1998 + 1, 1998 + 2) - | -- and ss_date between '1998-01-01' and '2000-12-25' - | -- 156 days - | and d_date between '1999-01-01' and '1999-03-31' - | and ss_sold_date_sk between 2451180 and 2451269 -- partition key filter - | group by - | ss_ticket_number, - | ss_customer_sk, - | ss_addr_sk, - | s_city - | ) ms - | join customer on (ms.ss_customer_sk = customer.c_customer_sk) - |order by - | c_last_name, - | c_first_name, - | city, - | profit - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q79 2029 / 2488 7.3 137.5 1.0X - */ - - ("q8", - """ - |select s_store_name - | ,sum(ss_net_profit) - | from store_sales - | ,date_dim - | ,store, - | (select distinct a01.ca_zip - | from - | (SELECT substr(ca_zip,1,5) ca_zip - | FROM customer_address - | WHERE substr(ca_zip,1,5) IN ('89436', '30868', '65085', '22977', '83927', '77557', - | '58429', '40697', '80614', '10502', '32779', - | '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', - | '21505', '17184', '10866', '67898', '25797', - | '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', - | '40811', '25990', '47513', '89531', '91068', - | '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', - | '89338', '88425', '32200', '81427', '19053', - | '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', - | '78890', '14090', '38123', '40936', '34425', - | '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', - | '21068', '57666', '37119', '25004', '57835', - | '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', - | '49613', '89977', '68310', '60069', '98360', - | '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', - | '57648', '15009', '80015', '42961', '63982', - | '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', - | '48043', '45645', '61163', '48375', '36447', - | '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', - | '80752', '49858', '52940', '96976', '63792', - | '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', - | '57856', '56372', '16165', '23427', '54561', - | '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', - | '13355', '21801', '46346', '37562', '56458', - | '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', - | '39936', '25632', '24611', '44166', '56648', - | '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', - | '63294', '14664', '21077', '82626', '18799', - | '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', - | '30884', '47484', '16072', '38936', '13036', - | '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', - | '20005', '18384', '76615', '11635', '38177', - | '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', - | '95464', '22246', '51061', '56692', '53121', - | '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', - | '24677', '66446', '94627', '53535', '15560', - | '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', - | '36635', '10827', '71286', '19736', '80619', - | '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', - | '63512', '28944', '14946', '36503', '54010', - | '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', - | '79144', '70373', '67031', '38360', '26705', - | '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', - | '92454', '13376', '14354', '19770', '22928', - | '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', - | '14172', '81410', '93578', '83583', '46047', - | '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', - | '70470', '72008', '35709', '91911', '69998', - | '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', - | '89091', '62378', '31904', '61869', '51744', - | '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', - | '42393', '20132', '55349', '86057', '21309', - | '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', - | '30214', '61024', '55307', '74621', '11622', - | '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', - | '32895', '59004', '32322', '14933', '32936', - | '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', - | '79307', '15492')) a01 - | inner join - | (select ca_zip - | from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt - | FROM customer_address, customer - | WHERE ca_address_sk = c_current_addr_sk and - | c_preferred_cust_flag='Y' - | group by ca_zip - | having count(*) > 10)A1 - | ) b11 - | on (a01.ca_zip = b11.ca_zip )) A2 - | where ss_store_sk = s_store_sk - | and ss_sold_date_sk = d_date_sk - | and ss_sold_date_sk between 2451271 and 2451361 - | and d_qoy = 2 and d_year = 1999 - | and (substr(s_zip,1,2) = substr(a2.ca_zip,1,2)) - | group by s_store_name - | order by s_store_name - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q8 1737 / 2197 8.7 115.6 1.0X - */ - - ("q82", """ - |select - | i_item_id, - | i_item_desc, - | i_current_price - |from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join inventory on (item.i_item_sk = inventory.inv_item_sk) - | join date_dim on (inventory.inv_date_sk = date_dim.d_date_sk) - |where - | i_current_price between 30 and 30 + 30 - | and i_manufact_id in (437, 129, 727, 663) - | and inv_quantity_on_hand between 100 and 500 - |group by - | i_item_id, - | i_item_desc, - | i_current_price - |order by - | i_item_id - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q82 9399 / 10245 6.8 147.2 1.0X - */ - - ("q89", """ - |select - | * - |from - | (select - | i_category, - | i_class, - | i_brand, - | s_store_name, - | s_company_name, - | d_moy, - | sum(ss_sales_price) sum_sales - | from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join store on (store_sales.ss_store_sk = store.s_store_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - | where - | ss_sold_date_sk between 2451545 and 2451910 -- partition key filter - | and d_year in (2000) - | and ((i_category in('Home', 'Books', 'Electronics') - | and i_class in('wallpaper', 'parenting', 'musical')) - | or (i_category in('Shoes', 'Jewelry', 'Men') - | and i_class in('womens', 'birdal', 'pants')) - | ) - | group by - | i_category, - | i_class, - | i_brand, - | s_store_name, - | s_company_name, - | d_moy - | ) tmp1 - |order by - | sum_sales, - | s_store_name - |limit 100 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q89 1122 / 1274 12.9 77.2 1.0X - */ - - ("q98", """ - |select - | i_item_desc, - | i_category, - | i_class, - | i_current_price, - | sum(ss_ext_sales_price) as itemrevenue - |from - | store_sales - | join item on (store_sales.ss_item_sk = item.i_item_sk) - | join date_dim on (store_sales.ss_sold_date_sk = date_dim.d_date_sk) - |where - | ss_sold_date_sk between 2451911 and 2451941 - | -- partition key filter (1 calendar month) - | and d_date between '2001-01-01' and '2001-01-31' - | and i_category in('Jewelry', 'Sports', 'Books') - |group by - | i_item_id, - | i_item_desc, - | i_category, - | i_class, - | i_current_price - |order by - | i_category, - | i_class, - | i_item_id, - | i_item_desc - | -- revenueratio - |limit 1000 - """.stripMargin), - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - q98 1235 / 1542 11.8 85.0 1.0X - */ - - ("ss_max", """ - |select - | count(*) as total, - | max(ss_sold_date_sk) as max_ss_sold_date_sk, - | max(ss_sold_time_sk) as max_ss_sold_time_sk, - | max(ss_item_sk) as max_ss_item_sk, - | max(ss_customer_sk) as max_ss_customer_sk, - | max(ss_cdemo_sk) as max_ss_cdemo_sk, - | max(ss_hdemo_sk) as max_ss_hdemo_sk, - | max(ss_addr_sk) as max_ss_addr_sk, - | max(ss_store_sk) as max_ss_store_sk, - | max(ss_promo_sk) as max_ss_promo_sk - |from store_sales - """.stripMargin) - - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - TPCDS Snappy (scale = 5): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------- - ss_max 2305 / 2731 6.2 160.0 1.0X - */ - - ).toArray - - val tables = Seq("customer", "customer_address", "customer_demographics", "date_dim", - "household_demographics", "inventory", "item", "promotion", "store", "catalog_sales", - "web_sales", "store_sales") - - def setupTables(dataLocation: String): Map[String, Long] = { - tables.map { tableName => - spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName) - tableName -> spark.table(tableName).count() - }.toMap - } - - def tpcdsAll(dataLocation: String): Unit = { - require(dataLocation.nonEmpty, - "please modify the value of dataLocation to point to your local TPCDS data") - val tableSizes = setupTables(dataLocation) - spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true") - spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true") - tpcds.filter(q => q._1 != "").foreach { - case (name: String, query: String) => - val numRows = spark.sql(query).queryExecution.logical.map { - case ur@UnresolvedRelation(t: TableIdentifier, _) => - tableSizes.getOrElse(t.table, throw new RuntimeException(s"${t.table} not found.")) - case _ => 0L - }.sum - val benchmark = new Benchmark("TPCDS Snappy (scale = 5)", numRows, 5) - benchmark.addCase(name) { i => - spark.sql(query).collect() - } - benchmark.run() - } - } - - def main(args: Array[String]): Unit = { - - // In order to run this benchmark, please follow the instructions at - // https://github.com/databricks/spark-sql-perf/blob/master/README.md to generate the TPCDS data - // locally (preferably with a scale factor of 5 for benchmarking). Thereafter, the value of - // dataLocation below needs to be set to the location where the generated data is stored. - val dataLocation = "" - - tpcdsAll(dataLocation) - } -} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
