Repository: spark
Updated Branches:
  refs/heads/master dcac8e6f4 -> a78d6ce37


http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q85.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q85.sql 
b/sql/core/src/test/resources/tpcds/q85.sql
new file mode 100755
index 0000000..cf718b0
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q85.sql
@@ -0,0 +1,82 @@
+SELECT
+  substr(r_reason_desc, 1, 20),
+  avg(ws_quantity),
+  avg(wr_refunded_cash),
+  avg(wr_fee)
+FROM web_sales, web_returns, web_page, customer_demographics cd1,
+  customer_demographics cd2, customer_address, date_dim, reason
+WHERE ws_web_page_sk = wp_web_page_sk
+  AND ws_item_sk = wr_item_sk
+  AND ws_order_number = wr_order_number
+  AND ws_sold_date_sk = d_date_sk AND d_year = 2000
+  AND cd1.cd_demo_sk = wr_refunded_cdemo_sk
+  AND cd2.cd_demo_sk = wr_returning_cdemo_sk
+  AND ca_address_sk = wr_refunded_addr_sk
+  AND r_reason_sk = wr_reason_sk
+  AND
+  (
+    (
+      cd1.cd_marital_status = 'M'
+        AND
+        cd1.cd_marital_status = cd2.cd_marital_status
+        AND
+        cd1.cd_education_status = 'Advanced Degree'
+        AND
+        cd1.cd_education_status = cd2.cd_education_status
+        AND
+        ws_sales_price BETWEEN 100.00 AND 150.00
+    )
+      OR
+      (
+        cd1.cd_marital_status = 'S'
+          AND
+          cd1.cd_marital_status = cd2.cd_marital_status
+          AND
+          cd1.cd_education_status = 'College'
+          AND
+          cd1.cd_education_status = cd2.cd_education_status
+          AND
+          ws_sales_price BETWEEN 50.00 AND 100.00
+      )
+      OR
+      (
+        cd1.cd_marital_status = 'W'
+          AND
+          cd1.cd_marital_status = cd2.cd_marital_status
+          AND
+          cd1.cd_education_status = '2 yr Degree'
+          AND
+          cd1.cd_education_status = cd2.cd_education_status
+          AND
+          ws_sales_price BETWEEN 150.00 AND 200.00
+      )
+  )
+  AND
+  (
+    (
+      ca_country = 'United States'
+        AND
+        ca_state IN ('IN', 'OH', 'NJ')
+        AND ws_net_profit BETWEEN 100 AND 200
+    )
+      OR
+      (
+        ca_country = 'United States'
+          AND
+          ca_state IN ('WI', 'CT', 'KY')
+          AND ws_net_profit BETWEEN 150 AND 300
+      )
+      OR
+      (
+        ca_country = 'United States'
+          AND
+          ca_state IN ('LA', 'IA', 'AR')
+          AND ws_net_profit BETWEEN 50 AND 250
+      )
+  )
+GROUP BY r_reason_desc
+ORDER BY substr(r_reason_desc, 1, 20)
+  , avg(ws_quantity)
+  , avg(wr_refunded_cash)
+  , avg(wr_fee)
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q86.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q86.sql 
b/sql/core/src/test/resources/tpcds/q86.sql
new file mode 100755
index 0000000..789a4ab
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q86.sql
@@ -0,0 +1,24 @@
+SELECT
+  sum(ws_net_paid) AS total_sum,
+  i_category,
+  i_class,
+  grouping(i_category) + grouping(i_class) AS lochierarchy,
+  rank()
+  OVER (
+    PARTITION BY grouping(i_category) + grouping(i_class),
+      CASE WHEN grouping(i_class) = 0
+        THEN i_category END
+    ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent
+FROM
+  web_sales, date_dim d1, item
+WHERE
+  d1.d_month_seq BETWEEN 1200 AND 1200 + 11
+    AND d1.d_date_sk = ws_sold_date_sk
+    AND i_item_sk = ws_item_sk
+GROUP BY ROLLUP (i_category, i_class)
+ORDER BY
+  lochierarchy DESC,
+  CASE WHEN lochierarchy = 0
+    THEN i_category END,
+  rank_within_parent
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q87.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q87.sql 
b/sql/core/src/test/resources/tpcds/q87.sql
new file mode 100755
index 0000000..4aaa9f3
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q87.sql
@@ -0,0 +1,28 @@
+SELECT count(*)
+FROM ((SELECT DISTINCT
+  c_last_name,
+  c_first_name,
+  d_date
+FROM store_sales, date_dim, customer
+WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
+  AND store_sales.ss_customer_sk = customer.c_customer_sk
+  AND d_month_seq BETWEEN 1200 AND 1200 + 11)
+      EXCEPT
+      (SELECT DISTINCT
+        c_last_name,
+        c_first_name,
+        d_date
+      FROM catalog_sales, date_dim, customer
+      WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+        AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+        AND d_month_seq BETWEEN 1200 AND 1200 + 11)
+      EXCEPT
+      (SELECT DISTINCT
+        c_last_name,
+        c_first_name,
+        d_date
+      FROM web_sales, date_dim, customer
+      WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk
+        AND web_sales.ws_bill_customer_sk = customer.c_customer_sk
+        AND d_month_seq BETWEEN 1200 AND 1200 + 11)
+     ) cool_cust

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q88.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q88.sql 
b/sql/core/src/test/resources/tpcds/q88.sql
new file mode 100755
index 0000000..25bcd90
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q88.sql
@@ -0,0 +1,122 @@
+SELECT *
+FROM
+  (SELECT count(*) h8_30_to_9
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 8
+    AND time_dim.t_minute >= 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s1,
+  (SELECT count(*) h9_to_9_30
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 9
+    AND time_dim.t_minute < 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s2,
+  (SELECT count(*) h9_30_to_10
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 9
+    AND time_dim.t_minute >= 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s3,
+  (SELECT count(*) h10_to_10_30
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 10
+    AND time_dim.t_minute < 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s4,
+  (SELECT count(*) h10_30_to_11
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 10
+    AND time_dim.t_minute >= 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s5,
+  (SELECT count(*) h11_to_11_30
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 11
+    AND time_dim.t_minute < 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s6,
+  (SELECT count(*) h11_30_to_12
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 11
+    AND time_dim.t_minute >= 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s7,
+  (SELECT count(*) h12_to_12_30
+  FROM store_sales, household_demographics, time_dim, store
+  WHERE ss_sold_time_sk = time_dim.t_time_sk
+    AND ss_hdemo_sk = household_demographics.hd_demo_sk
+    AND ss_store_sk = s_store_sk
+    AND time_dim.t_hour = 12
+    AND time_dim.t_minute < 30
+    AND (
+    (household_demographics.hd_dep_count = 4 AND 
household_demographics.hd_vehicle_count <= 4 + 2)
+      OR
+      (household_demographics.hd_dep_count = 2 AND 
household_demographics.hd_vehicle_count <= 2 + 2)
+      OR
+      (household_demographics.hd_dep_count = 0 AND
+        household_demographics.hd_vehicle_count <= 0 + 2))
+    AND store.s_store_name = 'ese') s8

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q89.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q89.sql 
b/sql/core/src/test/resources/tpcds/q89.sql
new file mode 100755
index 0000000..75408cb
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q89.sql
@@ -0,0 +1,30 @@
+SELECT *
+FROM (
+       SELECT
+         i_category,
+         i_class,
+         i_brand,
+         s_store_name,
+         s_company_name,
+         d_moy,
+         sum(ss_sales_price) sum_sales,
+         avg(sum(ss_sales_price))
+         OVER
+         (PARTITION BY i_category, i_brand, s_store_name, s_company_name)
+         avg_monthly_sales
+       FROM item, store_sales, date_dim, store
+       WHERE ss_item_sk = i_item_sk AND
+         ss_sold_date_sk = d_date_sk AND
+         ss_store_sk = s_store_sk AND
+         d_year IN (1999) AND
+         ((i_category IN ('Books', 'Electronics', 'Sports') AND
+           i_class IN ('computers', 'stereo', 'football'))
+           OR (i_category IN ('Men', 'Jewelry', 'Women') AND
+           i_class IN ('shirts', 'birdal', 'dresses')))
+       GROUP BY i_category, i_class, i_brand,
+         s_store_name, s_company_name, d_moy) tmp1
+WHERE CASE WHEN (avg_monthly_sales <> 0)
+  THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales)
+      ELSE NULL END > 0.1
+ORDER BY sum_sales - avg_monthly_sales, s_store_name
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q9.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q9.sql 
b/sql/core/src/test/resources/tpcds/q9.sql
new file mode 100755
index 0000000..de3db9d
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q9.sql
@@ -0,0 +1,48 @@
+SELECT
+  CASE WHEN (SELECT count(*)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 1 AND 20) > 62316685
+    THEN (SELECT avg(ss_ext_discount_amt)
+    FROM store_sales
+    WHERE ss_quantity BETWEEN 1 AND 20)
+  ELSE (SELECT avg(ss_net_paid)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 1 AND 20) END bucket1,
+  CASE WHEN (SELECT count(*)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 21 AND 40) > 19045798
+    THEN (SELECT avg(ss_ext_discount_amt)
+    FROM store_sales
+    WHERE ss_quantity BETWEEN 21 AND 40)
+  ELSE (SELECT avg(ss_net_paid)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 21 AND 40) END bucket2,
+  CASE WHEN (SELECT count(*)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 41 AND 60) > 365541424
+    THEN (SELECT avg(ss_ext_discount_amt)
+    FROM store_sales
+    WHERE ss_quantity BETWEEN 41 AND 60)
+  ELSE (SELECT avg(ss_net_paid)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 41 AND 60) END bucket3,
+  CASE WHEN (SELECT count(*)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 61 AND 80) > 216357808
+    THEN (SELECT avg(ss_ext_discount_amt)
+    FROM store_sales
+    WHERE ss_quantity BETWEEN 61 AND 80)
+  ELSE (SELECT avg(ss_net_paid)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 61 AND 80) END bucket4,
+  CASE WHEN (SELECT count(*)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 81 AND 100) > 184483884
+    THEN (SELECT avg(ss_ext_discount_amt)
+    FROM store_sales
+    WHERE ss_quantity BETWEEN 81 AND 100)
+  ELSE (SELECT avg(ss_net_paid)
+  FROM store_sales
+  WHERE ss_quantity BETWEEN 81 AND 100) END bucket5
+FROM reason
+WHERE r_reason_sk = 1

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q90.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q90.sql 
b/sql/core/src/test/resources/tpcds/q90.sql
new file mode 100755
index 0000000..85e35bf
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q90.sql
@@ -0,0 +1,19 @@
+SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio
+FROM (SELECT count(*) amc
+FROM web_sales, household_demographics, time_dim, web_page
+WHERE ws_sold_time_sk = time_dim.t_time_sk
+  AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+  AND ws_web_page_sk = web_page.wp_web_page_sk
+  AND time_dim.t_hour BETWEEN 8 AND 8 + 1
+  AND household_demographics.hd_dep_count = 6
+  AND web_page.wp_char_count BETWEEN 5000 AND 5200) at,
+  (SELECT count(*) pmc
+  FROM web_sales, household_demographics, time_dim, web_page
+  WHERE ws_sold_time_sk = time_dim.t_time_sk
+    AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+    AND ws_web_page_sk = web_page.wp_web_page_sk
+    AND time_dim.t_hour BETWEEN 19 AND 19 + 1
+    AND household_demographics.hd_dep_count = 6
+    AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt
+ORDER BY am_pm_ratio
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q91.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q91.sql 
b/sql/core/src/test/resources/tpcds/q91.sql
new file mode 100755
index 0000000..9ca7ce0
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q91.sql
@@ -0,0 +1,23 @@
+SELECT
+  cc_call_center_id Call_Center,
+  cc_name Call_Center_Name,
+  cc_manager Manager,
+  sum(cr_net_loss) Returns_Loss
+FROM
+  call_center, catalog_returns, date_dim, customer, customer_address,
+  customer_demographics, household_demographics
+WHERE
+  cr_call_center_sk = cc_call_center_sk
+    AND cr_returned_date_sk = d_date_sk
+    AND cr_returning_customer_sk = c_customer_sk
+    AND cd_demo_sk = c_current_cdemo_sk
+    AND hd_demo_sk = c_current_hdemo_sk
+    AND ca_address_sk = c_current_addr_sk
+    AND d_year = 1998
+    AND d_moy = 11
+    AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown')
+    OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree'))
+    AND hd_buy_potential LIKE 'Unknown%'
+    AND ca_gmt_offset = -7
+GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, 
cd_education_status
+ORDER BY sum(cr_net_loss) DESC

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q92.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q92.sql 
b/sql/core/src/test/resources/tpcds/q92.sql
new file mode 100755
index 0000000..99129c3
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q92.sql
@@ -0,0 +1,16 @@
+SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount `
+FROM web_sales, item, date_dim
+WHERE i_manufact_id = 350
+  AND i_item_sk = ws_item_sk
+  AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 
90 days)
+  AND d_date_sk = ws_sold_date_sk
+  AND ws_ext_discount_amt >
+  (
+    SELECT 1.3 * avg(ws_ext_discount_amt)
+    FROM web_sales, date_dim
+    WHERE ws_item_sk = i_item_sk
+      AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + 
INTERVAL 90 days)
+      AND d_date_sk = ws_sold_date_sk
+  )
+ORDER BY sum(ws_ext_discount_amt)
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q93.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q93.sql 
b/sql/core/src/test/resources/tpcds/q93.sql
new file mode 100755
index 0000000..222dc31
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q93.sql
@@ -0,0 +1,19 @@
+SELECT
+  ss_customer_sk,
+  sum(act_sales) sumsales
+FROM (SELECT
+  ss_item_sk,
+  ss_ticket_number,
+  ss_customer_sk,
+  CASE WHEN sr_return_quantity IS NOT NULL
+    THEN (ss_quantity - sr_return_quantity) * ss_sales_price
+  ELSE (ss_quantity * ss_sales_price) END act_sales
+FROM store_sales
+  LEFT OUTER JOIN store_returns
+    ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number)
+  ,
+  reason
+WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t
+GROUP BY ss_customer_sk
+ORDER BY sumsales, ss_customer_sk
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q94.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q94.sql 
b/sql/core/src/test/resources/tpcds/q94.sql
new file mode 100755
index 0000000..d6de3d7
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q94.sql
@@ -0,0 +1,23 @@
+SELECT
+  count(DISTINCT ws_order_number) AS `order count `,
+  sum(ws_ext_ship_cost) AS `total shipping cost `,
+  sum(ws_net_profit) AS `total net profit `
+FROM
+  web_sales ws1, date_dim, customer_address, web_site
+WHERE
+  d_date BETWEEN '1999-02-01' AND
+  (CAST('1999-02-01' AS DATE) + INTERVAL 60 days)
+    AND ws1.ws_ship_date_sk = d_date_sk
+    AND ws1.ws_ship_addr_sk = ca_address_sk
+    AND ca_state = 'IL'
+    AND ws1.ws_web_site_sk = web_site_sk
+    AND web_company_name = 'pri'
+    AND EXISTS(SELECT *
+               FROM web_sales ws2
+               WHERE ws1.ws_order_number = ws2.ws_order_number
+                 AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+    AND NOT EXISTS(SELECT *
+                   FROM web_returns wr1
+                   WHERE ws1.ws_order_number = wr1.wr_order_number)
+ORDER BY count(DISTINCT ws_order_number)
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q95.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q95.sql 
b/sql/core/src/test/resources/tpcds/q95.sql
new file mode 100755
index 0000000..df71f00
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q95.sql
@@ -0,0 +1,29 @@
+WITH ws_wh AS
+(SELECT
+    ws1.ws_order_number,
+    ws1.ws_warehouse_sk wh1,
+    ws2.ws_warehouse_sk wh2
+  FROM web_sales ws1, web_sales ws2
+  WHERE ws1.ws_order_number = ws2.ws_order_number
+    AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+SELECT
+  count(DISTINCT ws_order_number) AS `order count `,
+  sum(ws_ext_ship_cost) AS `total shipping cost `,
+  sum(ws_net_profit) AS `total net profit `
+FROM
+  web_sales ws1, date_dim, customer_address, web_site
+WHERE
+  d_date BETWEEN '1999-02-01' AND
+  (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY)
+    AND ws1.ws_ship_date_sk = d_date_sk
+    AND ws1.ws_ship_addr_sk = ca_address_sk
+    AND ca_state = 'IL'
+    AND ws1.ws_web_site_sk = web_site_sk
+    AND web_company_name = 'pri'
+    AND ws1.ws_order_number IN (SELECT ws_order_number
+  FROM ws_wh)
+    AND ws1.ws_order_number IN (SELECT wr_order_number
+  FROM web_returns, ws_wh
+  WHERE wr_order_number = ws_wh.ws_order_number)
+ORDER BY count(DISTINCT ws_order_number)
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q96.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q96.sql 
b/sql/core/src/test/resources/tpcds/q96.sql
new file mode 100755
index 0000000..7ab17e7
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q96.sql
@@ -0,0 +1,11 @@
+SELECT count(*)
+FROM store_sales, household_demographics, time_dim, store
+WHERE ss_sold_time_sk = time_dim.t_time_sk
+  AND ss_hdemo_sk = household_demographics.hd_demo_sk
+  AND ss_store_sk = s_store_sk
+  AND time_dim.t_hour = 20
+  AND time_dim.t_minute >= 30
+  AND household_demographics.hd_dep_count = 7
+  AND store.s_store_name = 'ese'
+ORDER BY count(*)
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q97.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q97.sql 
b/sql/core/src/test/resources/tpcds/q97.sql
new file mode 100755
index 0000000..e7e0b1a
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q97.sql
@@ -0,0 +1,30 @@
+WITH ssci AS (
+  SELECT
+    ss_customer_sk customer_sk,
+    ss_item_sk item_sk
+  FROM store_sales, date_dim
+  WHERE ss_sold_date_sk = d_date_sk
+    AND d_month_seq BETWEEN 1200 AND 1200 + 11
+  GROUP BY ss_customer_sk, ss_item_sk),
+    csci AS (
+    SELECT
+      cs_bill_customer_sk customer_sk,
+      cs_item_sk item_sk
+    FROM catalog_sales, date_dim
+    WHERE cs_sold_date_sk = d_date_sk
+      AND d_month_seq BETWEEN 1200 AND 1200 + 11
+    GROUP BY cs_bill_customer_sk, cs_item_sk)
+SELECT
+  sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL
+    THEN 1
+      ELSE 0 END) store_only,
+  sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL
+    THEN 1
+      ELSE 0 END) catalog_only,
+  sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL
+    THEN 1
+      ELSE 0 END) store_and_catalog
+FROM ssci
+  FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk
+    AND ssci.item_sk = csci.item_sk)
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q98.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q98.sql 
b/sql/core/src/test/resources/tpcds/q98.sql
new file mode 100755
index 0000000..bb10d4b
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q98.sql
@@ -0,0 +1,21 @@
+SELECT
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price,
+  sum(ss_ext_sales_price) AS itemrevenue,
+  sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price))
+  OVER
+  (PARTITION BY i_class) AS revenueratio
+FROM
+  store_sales, item, date_dim
+WHERE
+  ss_item_sk = i_item_sk
+    AND i_category IN ('Sports', 'Books', 'Home')
+    AND ss_sold_date_sk = d_date_sk
+    AND d_date BETWEEN cast('1999-02-22' AS DATE)
+  AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days)
+GROUP BY
+  i_item_id, i_item_desc, i_category, i_class, i_current_price
+ORDER BY
+  i_category, i_class, i_item_id, i_item_desc, revenueratio

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/resources/tpcds/q99.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds/q99.sql 
b/sql/core/src/test/resources/tpcds/q99.sql
new file mode 100755
index 0000000..f1a3d4d
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds/q99.sql
@@ -0,0 +1,34 @@
+SELECT
+  substr(w_warehouse_name, 1, 20),
+  sm_type,
+  cc_name,
+  sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30)
+    THEN 1
+      ELSE 0 END)  AS `30 days `,
+  sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND
+    (cs_ship_date_sk - cs_sold_date_sk <= 60)
+    THEN 1
+      ELSE 0 END)  AS `31 - 60 days `,
+  sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND
+    (cs_ship_date_sk - cs_sold_date_sk <= 90)
+    THEN 1
+      ELSE 0 END)  AS `61 - 90 days `,
+  sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND
+    (cs_ship_date_sk - cs_sold_date_sk <= 120)
+    THEN 1
+      ELSE 0 END)  AS `91 - 120 days `,
+  sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120)
+    THEN 1
+      ELSE 0 END)  AS `>120 days `
+FROM
+  catalog_sales, warehouse, ship_mode, call_center, date_dim
+WHERE
+  d_month_seq BETWEEN 1200 AND 1200 + 11
+    AND cs_ship_date_sk = d_date_sk
+    AND cs_warehouse_sk = w_warehouse_sk
+    AND cs_ship_mode_sk = sm_ship_mode_sk
+    AND cs_call_center_sk = cc_call_center_sk
+GROUP BY
+  substr(w_warehouse_name, 1, 20), sm_type, cc_name
+ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name
+LIMIT 100

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
new file mode 100644
index 0000000..398d8d9
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import java.io.File
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.Benchmark
+
+/**
+ * Benchmark to measure TPCDS query performance.
+ * To run this:
+ *  spark-submit --class <this class> --jars <spark sql test jar>
+ */
+object TPCDSQueryBenchmark {
+  val conf =
+    new SparkConf()
+      .setMaster("local[1]")
+      .setAppName("test-sql-context")
+      .set("spark.sql.parquet.compression.codec", "snappy")
+      .set("spark.sql.shuffle.partitions", "4")
+      .set("spark.driver.memory", "3g")
+      .set("spark.executor.memory", "3g")
+      .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
+
+  val spark = SparkSession.builder.config(conf).getOrCreate()
+
+  val tables = Seq("catalog_page", "catalog_returns", "customer", 
"customer_address",
+    "customer_demographics", "date_dim", "household_demographics", 
"inventory", "item",
+    "promotion", "store", "store_returns", "catalog_sales", "web_sales", 
"store_sales",
+    "web_returns", "web_site", "reason", "call_center", "warehouse", 
"ship_mode", "income_band",
+    "time_dim", "web_page")
+
+  def setupTables(dataLocation: String): Map[String, Long] = {
+    tables.map { tableName =>
+      
spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName)
+      tableName -> spark.table(tableName).count()
+    }.toMap
+  }
+
+  def tpcdsAll(dataLocation: String, queries: Seq[String]): Unit = {
+    require(dataLocation.nonEmpty,
+      "please modify the value of dataLocation to point to your local TPCDS 
data")
+    val tableSizes = setupTables(dataLocation)
+    spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
+    spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+    queries.foreach { name =>
+      val queryString = fileToString(new 
File(Thread.currentThread().getContextClassLoader
+        .getResource(s"tpcds/$name.sql").getFile))
+
+      // This is an indirect hack to estimate the size of each query's input 
by traversing the
+      // logical plan and adding up the sizes of all tables that appear in the 
plan. Note that this
+      // currently doesn't take WITH subqueries into account which might lead 
to fairly inaccurate
+      // per-row processing time for those cases.
+      val queryRelations = scala.collection.mutable.HashSet[String]()
+      spark.sql(queryString).queryExecution.logical.map {
+        case ur @ UnresolvedRelation(t: TableIdentifier, _) =>
+          queryRelations.add(t.table)
+        case lp: LogicalPlan =>
+          lp.expressions.foreach { _ foreach {
+            case subquery: SubqueryExpression =>
+              subquery.plan.foreach {
+                case ur @ UnresolvedRelation(t: TableIdentifier, _) =>
+                  queryRelations.add(t.table)
+                case _ =>
+              }
+            case _ =>
+          }
+        }
+        case _ =>
+      }
+      val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum
+      val benchmark = new Benchmark(s"TPCDS Snappy", numRows, 5)
+      benchmark.addCase(name) { i =>
+        spark.sql(queryString).collect()
+      }
+      benchmark.run()
+    }
+  }
+
+  def main(args: Array[String]): Unit = {
+
+    // List of all TPC-DS queries
+    val tpcdsQueries = Seq(
+      "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
+      "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20",
+      "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", 
"q28", "q29", "q30",
+      "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", 
"q40",
+      "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50",
+      "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60",
+      "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70",
+      "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80",
+      "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
+      "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
+
+    // In order to run this benchmark, please follow the instructions at
+    // https://github.com/databricks/spark-sql-perf/blob/master/README.md to 
generate the TPCDS data
+    // locally (preferably with a scale factor of 5 for benchmarking). 
Thereafter, the value of
+    // dataLocation below needs to be set to the location where the generated 
data is stored.
+    val dataLocation = ""
+
+    tpcdsAll(dataLocation, queries = tpcdsQueries)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/a78d6ce3/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala
deleted file mode 100644
index 228ae6f..0000000
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/TPCDSBenchmark.scala
+++ /dev/null
@@ -1,1226 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.parquet
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.util.Benchmark
-
-/**
- * Benchmark to measure TPCDS query performance.
- * To run this:
- *  spark-submit --class <this class> --jars <spark sql test jar>
- */
-object TPCDSBenchmark {
-  val conf = new SparkConf()
-  conf.set("spark.sql.parquet.compression.codec", "snappy")
-  conf.set("spark.sql.shuffle.partitions", "4")
-  conf.set("spark.driver.memory", "3g")
-  conf.set("spark.executor.memory", "3g")
-  conf.set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
-  conf.setMaster("local[1]")
-  conf.setAppName("test-sql-context")
-
-  val spark = SparkSession.builder.config(conf).getOrCreate()
-
-  // These queries a subset of the TPCDS benchmark queries and are taken from
-  // 
https://github.com/databricks/spark-sql-perf/blob/master/src/main/scala/com/databricks/spark/
-  // sql/perf/tpcds/ImpalaKitQueries.scala
-  val tpcds = Seq(
-    ("q19", """
-              |select
-              |  i_brand_id,
-              |  i_brand,
-              |  i_manufact_id,
-              |  i_manufact,
-              |  sum(ss_ext_sales_price) ext_price
-              |from
-              |  store_sales
-              |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |  join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |  join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |  join customer on (store_sales.ss_customer_sk = 
customer.c_customer_sk)
-              |  join customer_address on
-              |    (customer.c_current_addr_sk = 
customer_address.ca_address_sk)
-              |where
-              |  ss_sold_date_sk between 2451484 and 2451513
-              |  and d_moy = 11
-              |  and d_year = 1999
-              |  and i_manager_id = 7
-              |  and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
-              |group by
-              |  i_brand,
-              |  i_brand_id,
-              |  i_manufact_id,
-              |  i_manufact
-              |order by
-              |  ext_price desc,
-              |  i_brand,
-              |  i_brand_id,
-              |  i_manufact_id,
-              |  i_manufact
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q19                                      1710 / 1858          8.7         
114.5       1.0X
-     */
-
-    ("q27", """
-              |select
-              |  i_item_id,
-              |  s_state,
-              |  avg(ss_quantity) agg1,
-              |  avg(ss_list_price) agg2,
-              |  avg(ss_coupon_amt) agg3,
-              |  avg(ss_sales_price) agg4
-              |from
-              |  store_sales
-              |  join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |  join customer_demographics on
-              |    (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
-              |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |  join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |where
-              |  ss_sold_date_sk between 2450815 and 2451179  -- partition key 
filter
-              |  and d_year = 1998
-              |  and cd_gender = 'F'
-              |  and cd_marital_status = 'W'
-              |  and cd_education_status = 'Primary'
-              |  and s_state in ('WI', 'CA', 'TX', 'FL', 'WA', 'TN')
-              |group by
-              |  i_item_id,
-              |  s_state
-              |order by
-              |  i_item_id,
-              |  s_state
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q27                                      2016 / 2180          8.2         
122.6       1.0X
-     */
-
-    ("q3", """
-             |select
-             |  dt.d_year,
-             |  item.i_brand_id brand_id,
-             |  item.i_brand brand,
-             |  sum(ss_ext_sales_price) sum_agg
-             |from
-             |  store_sales
-             |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-             |  join date_dim dt on (dt.d_date_sk = 
store_sales.ss_sold_date_sk)
-             |where
-             |  item.i_manufact_id = 436
-             |  and dt.d_moy = 12
-             |  and (ss_sold_date_sk between 2451149 and 2451179
-             |    or ss_sold_date_sk between 2451514 and 2451544
-             |    or ss_sold_date_sk between 2451880 and 2451910
-             |    or ss_sold_date_sk between 2452245 and 2452275
-             |    or ss_sold_date_sk between 2452610 and 2452640)
-             |group by
-             |  d_year,
-             |  item.i_brand,
-             |  item.i_brand_id
-             |order by
-             |  d_year,
-             |  sum_agg desc,
-             |  brand_id
-             |limit 100
-           """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q3                                       1073 / 1140         13.5          
73.9       1.0X
-     */
-
-    ("q34", """
-              |select
-              |  c_last_name,
-              |  c_first_name,
-              |  c_salutation,
-              |  c_preferred_cust_flag,
-              |  ss_ticket_number,
-              |  cnt
-              |from
-              |  (select
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    count(*) cnt
-              |  from
-              |    store_sales
-              |    join household_demographics on
-              |      (store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk)
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |  where
-              |    date_dim.d_year in (1998, 1998 + 1, 1998 + 2)
-              |    and (date_dim.d_dom between 1 and 3
-              |      or date_dim.d_dom between 25 and 28)
-              |    and (household_demographics.hd_buy_potential = '>10000'
-              |      or household_demographics.hd_buy_potential = 'unknown')
-              |    and household_demographics.hd_vehicle_count > 0
-              |    and (case when household_demographics.hd_vehicle_count > 0 
then
-              |        household_demographics.hd_dep_count / 
household_demographics.hd_vehicle_count
-              |      else null end) > 1.2
-              |    and ss_sold_date_sk between 2450816 and 2451910 -- 
partition key filter
-              |  group by
-              |    ss_ticket_number,
-              |    ss_customer_sk
-              |  ) dn
-              |join customer on (dn.ss_customer_sk = customer.c_customer_sk)
-              |where
-              |  cnt between 15 and 20
-              |order by
-              |  c_last_name,
-              |  c_first_name,
-              |  c_salutation,
-              |  c_preferred_cust_flag desc,
-              |  ss_ticket_number,
-              |  cnt
-              |limit 1000
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q34                                      1482 / 1734         10.0         
100.4       1.0X
-     */
-
-    ("q42", """
-              |select
-              |  d_year,
-              |  i_category_id,
-              |  i_category,
-              |  sum(ss_ext_sales_price) as total_price
-              |from
-              |  store_sales
-              |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |  join date_dim dt on (dt.d_date_sk = 
store_sales.ss_sold_date_sk)
-              |where
-              |  item.i_manager_id = 1
-              |  and dt.d_moy = 12
-              |  and dt.d_year = 1998
-              |  and ss_sold_date_sk between 2451149 and 2451179  -- partition 
key filter
-              |group by
-              |  d_year,
-              |  i_category_id,
-              |  i_category
-              |order by
-              |  total_price desc,
-              |  d_year,
-              |  i_category_id,
-              |  i_category
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q42                                      1125 / 1357         12.9          
77.4       1.0X
-     */
-
-    ("q43", """
-              |select
-              |  s_store_name,
-              |  s_store_id,
-              |  sum(case when (d_day_name = 'Sunday') then ss_sales_price 
else null end) sun_sales,
-              |  sum(case when (d_day_name = 'Monday') then ss_sales_price 
else null end) mon_sales,
-              |  sum(case when (d_day_name = 'Tuesday') then
-              |    ss_sales_price else null end) tue_sales,
-              |  sum(case when (d_day_name = 'Wednesday') then
-              |    ss_sales_price else null end) wed_sales,
-              |  sum(case when (d_day_name = 'Thursday') then
-              |    ss_sales_price else null end) thu_sales,
-              |  sum(case when (d_day_name = 'Friday') then ss_sales_price 
else null end) fri_sales,
-              |  sum(case when (d_day_name = 'Saturday') then
-              |    ss_sales_price else null end) sat_sales
-              |from
-              |  store_sales
-              |  join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |  join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |where
-              |  s_gmt_offset = -5
-              |  and d_year = 1998
-              |  and ss_sold_date_sk between 2450816 and 2451179  -- partition 
key filter
-              |group by
-              |  s_store_name,
-              |  s_store_id
-              |order by
-              |  s_store_name,
-              |  s_store_id,
-              |  sun_sales,
-              |  mon_sales,
-              |  tue_sales,
-              |  wed_sales,
-              |  thu_sales,
-              |  fri_sales,
-              |  sat_sales
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q43                                      1681 / 1985          8.6         
116.1       1.0X
-     */
-
-    ("q46", """
-              |select
-              |  c_last_name,
-              |  c_first_name,
-              |  ca_city,
-              |  bought_city,
-              |  ss_ticket_number,
-              |  amt,
-              |  profit
-              |from
-              |  (select
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    ca_city bought_city,
-              |    sum(ss_coupon_amt) amt,
-              |    sum(ss_net_profit) profit
-              |  from
-              |    store_sales
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |    join household_demographics on
-              |      (store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk)
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |    join customer_address on
-              |      (store_sales.ss_addr_sk = customer_address.ca_address_sk)
-              |  where
-              |    store.s_city in ('Midway', 'Concord', 'Spring Hill', 
'Brownsville', 'Greenville')
-              |    and (household_demographics.hd_dep_count = 5
-              |      or household_demographics.hd_vehicle_count = 3)
-              |    and date_dim.d_dow in (6, 0)
-              |    and date_dim.d_year in (1999, 1999 + 1, 1999 + 2)
-              |      group by
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    ss_addr_sk,
-              |    ca_city
-              |  ) dn
-              |  join customer on (dn.ss_customer_sk = customer.c_customer_sk)
-              |  join customer_address current_addr on
-              |    (customer.c_current_addr_sk = current_addr.ca_address_sk)
-              |where
-              |  current_addr.ca_city <> bought_city
-              |order by
-              |  c_last_name,
-              |  c_first_name,
-              |  ca_city,
-              |  bought_city,
-              |  ss_ticket_number
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q46                                      2948 / 3218          5.1         
196.1       1.0X
-     */
-
-    ("q52", """
-              |select
-              |  d_year,
-              |  i_brand_id,
-              |  i_brand,
-              |  sum(ss_ext_sales_price) ext_price
-              |from
-              |  store_sales
-              |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |  join date_dim dt on (store_sales.ss_sold_date_sk = 
dt.d_date_sk)
-              |where
-              |  i_manager_id = 1
-              |  and d_moy = 12
-              |  and d_year = 1998
-              |  and ss_sold_date_sk between 2451149 and 2451179 -- partition 
key filter
-              |group by
-              |  d_year,
-              |  i_brand,
-              |  i_brand_id
-              |order by
-              |  d_year,
-              |  ext_price desc,
-              |  i_brand_id
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q52                                      1099 / 1228         13.2          
75.7       1.0X
-     */
-
-    ("q53", """
-              |select
-              |  *
-              |from
-              |  (select
-              |    i_manufact_id,
-              |    sum(ss_sales_price) sum_sales
-              |  from
-              |    store_sales
-              |    join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |  where
-              |    ss_sold_date_sk between 2451911 and 2452275 -- partition 
key filter
-              |    and d_month_seq in(1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 
+ 4, 1212 + 5,
-              |      1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 
11)
-              |    and (
-              |         (i_category in('Books', 'Children', 'Electronics')
-              |           and i_class in('personal', 'portable', 'reference', 
'self-help')
-              |           and i_brand in('scholaramalgamalg #14', 
'scholaramalgamalg #7',
-              |             'exportiunivamalg #9', 'scholaramalgamalg #9')
-              |         )
-              |         or
-              |         (i_category in('Women', 'Music', 'Men')
-              |           and i_class in('accessories', 'classical', 
'fragrances', 'pants')
-              |           and i_brand in('amalgimporto #1', 'edu packscholar 
#1',
-              |             'exportiimporto #1', 'importoamalg #1')
-              |         )
-              |       )
-              |  group by
-              |    i_manufact_id,
-              |    d_qoy
-              |  ) tmp1
-              |order by
-              |  sum_sales,
-              |  i_manufact_id
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q53                                       968 / 1020         15.0          
66.6       1.0X
-     */
-
-    ("q55", """
-              |select
-              |  i_brand_id,
-              |  i_brand,
-              |  sum(ss_ext_sales_price) ext_price
-              |from
-              |  store_sales
-              |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |  join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |where
-              |  i_manager_id = 36
-              |  and d_moy = 12
-              |  and d_year = 2001
-              |  and ss_sold_date_sk between 2452245 and 2452275 -- partition 
key filter
-              |group by
-              |  i_brand,
-              |  i_brand_id
-              |order by
-              |  ext_price desc,
-              |  i_brand_id
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q55                                      1002 / 1020         14.5          
69.0       1.0X
-     */
-
-    ("q59", """
-              |select
-              |  s_store_name1,
-              |  s_store_id1,
-              |  d_week_seq1,
-              |  sun_sales1 / sun_sales2,
-              |  mon_sales1 / mon_sales2,
-              |  tue_sales1 / tue_sales2,
-              |  wed_sales1 / wed_sales2,
-              |  thu_sales1 / thu_sales2,
-              |  fri_sales1 / fri_sales2,
-              |  sat_sales1 / sat_sales2
-              |from
-              |  (select
-              |    s_store_name s_store_name1,
-              |    wss.d_week_seq d_week_seq1,
-              |    s_store_id s_store_id1,
-              |    sun_sales sun_sales1,
-              |    mon_sales mon_sales1,
-              |    tue_sales tue_sales1,
-              |    wed_sales wed_sales1,
-              |    thu_sales thu_sales1,
-              |    fri_sales fri_sales1,
-              |    sat_sales sat_sales1
-              |  from
-              |    (select
-              |      d_week_seq,
-              |      ss_store_sk,
-              |      sum(case when(d_day_name = 'Sunday') then
-              |        ss_sales_price else null end) sun_sales,
-              |      sum(case when(d_day_name = 'Monday') then
-              |        ss_sales_price else null end) mon_sales,
-              |      sum(case when(d_day_name = 'Tuesday') then
-              |        ss_sales_price else null end) tue_sales,
-              |      sum(case when(d_day_name = 'Wednesday') then
-              |        ss_sales_price else null end) wed_sales,
-              |      sum(case when(d_day_name = 'Thursday') then
-              |        ss_sales_price else null end) thu_sales,
-              |      sum(case when(d_day_name = 'Friday') then
-              |        ss_sales_price else null end) fri_sales,
-              |      sum(case when(d_day_name = 'Saturday') then
-              |        ss_sales_price else null end) sat_sales
-              |    from
-              |      store_sales
-              |      join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |    where
-              |      ss_sold_date_sk between 2451088 and 2451452
-              |    group by
-              |      d_week_seq,
-              |      ss_store_sk
-              |    ) wss
-              |    join store on (wss.ss_store_sk = store.s_store_sk)
-              |    join date_dim d on (wss.d_week_seq = d.d_week_seq)
-              |  where
-              |    d_month_seq between 1185 and 1185 + 11
-              |  ) y
-              |  join
-              |  (select
-              |    s_store_name s_store_name2,
-              |    wss.d_week_seq d_week_seq2,
-              |    s_store_id s_store_id2,
-              |    sun_sales sun_sales2,
-              |    mon_sales mon_sales2,
-              |    tue_sales tue_sales2,
-              |    wed_sales wed_sales2,
-              |    thu_sales thu_sales2,
-              |    fri_sales fri_sales2,
-              |    sat_sales sat_sales2
-              |  from
-              |    (select
-              |      d_week_seq,
-              |      ss_store_sk,
-              |      sum(case when(d_day_name = 'Sunday') then
-              |        ss_sales_price else null end) sun_sales,
-              |      sum(case when(d_day_name = 'Monday') then
-              |        ss_sales_price else null end) mon_sales,
-              |      sum(case when(d_day_name = 'Tuesday') then
-              |        ss_sales_price else null end) tue_sales,
-              |      sum(case when(d_day_name = 'Wednesday') then
-              |        ss_sales_price else null end) wed_sales,
-              |      sum(case when(d_day_name = 'Thursday') then
-              |        ss_sales_price else null end) thu_sales,
-              |      sum(case when(d_day_name = 'Friday') then
-              |        ss_sales_price else null end) fri_sales,
-              |      sum(case when(d_day_name = 'Saturday') then
-              |        ss_sales_price else null end) sat_sales
-              |    from
-              |      store_sales
-              |      join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |    where
-              |      ss_sold_date_sk between 2451088 and 2451452
-              |    group by
-              |      d_week_seq,
-              |      ss_store_sk
-              |    ) wss
-              |    join store on (wss.ss_store_sk = store.s_store_sk)
-              |    join date_dim d on (wss.d_week_seq = d.d_week_seq)
-              |  where
-              |    d_month_seq between 1185 + 12 and 1185 + 23
-              |  ) x
-              |  on (y.s_store_id1 = x.s_store_id2)
-              |where
-              |  d_week_seq1 = d_week_seq2 - 52
-              |order by
-              |  s_store_name1,
-              |  s_store_id1,
-              |  d_week_seq1
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q59                                      1624 / 1663         17.9          
55.8       1.0X
-     */
-
-    ("q63", """
-              |select
-              |  *
-              |from
-              |  (select
-              |    i_manager_id,
-              |    sum(ss_sales_price) sum_sales
-              |  from
-              |    store_sales
-              |    join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |  where
-              |    ss_sold_date_sk between 2451911 and 2452275  -- partition 
key filter
-              |    and d_month_seq in (1212, 1212 + 1, 1212 + 2, 1212 + 3, 
1212 + 4, 1212 + 5,
-              |      1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 
11)
-              |    and (
-              |          (i_category in('Books', 'Children', 'Electronics')
-              |            and i_class in('personal', 'portable', 'refernece', 
'self-help')
-              |            and i_brand in('scholaramalgamalg #14', 
'scholaramalgamalg #7',
-              |              'exportiunivamalg #9', 'scholaramalgamalg #9')
-              |          )
-              |          or
-              |          (i_category in('Women', 'Music', 'Men')
-              |            and i_class in('accessories', 'classical', 
'fragrances', 'pants')
-              |            and i_brand in('amalgimporto #1', 'edu packscholar 
#1',
-              |              'exportiimporto #1', 'importoamalg #1')
-              |          )
-              |        )
-              |  group by
-              |    i_manager_id,
-              |    d_moy
-              |  ) tmp1
-              |order by
-              |  i_manager_id,
-              |  sum_sales
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q63                                       979 / 1006         14.8          
67.4       1.0X
-     */
-
-    ("q65", """
-              |select
-              |  s_store_name,
-              |  i_item_desc,
-              |  sc.revenue,
-              |  i_current_price,
-              |  i_wholesale_cost,
-              |  i_brand
-              |from
-              |  (select
-              |    ss_store_sk,
-              |    ss_item_sk,
-              |    sum(ss_sales_price) as revenue
-              |  from
-              |    store_sales
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |  where
-              |    ss_sold_date_sk between 2451911 and 2452275  -- partition 
key filter
-              |    and d_month_seq between 1212 and 1212 + 11
-              |  group by
-              |    ss_store_sk,
-              |    ss_item_sk
-              |  ) sc
-              |  join item on (sc.ss_item_sk = item.i_item_sk)
-              |  join store on (sc.ss_store_sk = store.s_store_sk)
-              |  join
-              |  (select
-              |    ss_store_sk,
-              |    avg(revenue) as ave
-              |  from
-              |    (select
-              |      ss_store_sk,
-              |      ss_item_sk,
-              |      sum(ss_sales_price) as revenue
-              |    from
-              |      store_sales
-              |      join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |    where
-              |      ss_sold_date_sk between 2451911 and 2452275  -- partition 
key filter
-              |      and d_month_seq between 1212 and 1212 + 11
-              |    group by
-              |      ss_store_sk,
-              |      ss_item_sk
-              |    ) sa
-              |  group by
-              |    ss_store_sk
-              |  ) sb on (sc.ss_store_sk = sb.ss_store_sk) -- 676 rows
-              |where
-              |  sc.revenue <= 0.1 * sb.ave
-              |order by
-              |  s_store_name,
-              |  i_item_desc
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q65                                      7770 / 8097          3.7         
267.9       1.0X
-     */
-
-    ("q68", """
-              |select
-              |  c_last_name,
-              |  c_first_name,
-              |  ca_city,
-              |  bought_city,
-              |  ss_ticket_number,
-              |  extended_price,
-              |  extended_tax,
-              |  list_price
-              |from
-              |  (select
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    ca_city bought_city,
-              |    sum(ss_ext_sales_price) extended_price,
-              |    sum(ss_ext_list_price) list_price,
-              |    sum(ss_ext_tax) extended_tax
-              |  from
-              |    store_sales
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |    join household_demographics on
-              |      (store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk)
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |    join customer_address on
-              |      (store_sales.ss_addr_sk = customer_address.ca_address_sk)
-              |  where
-              |    store.s_city in('Midway', 'Fairview')
-              |    --and date_dim.d_dom between 1 and 2
-              |    --and date_dim.d_year in(1999, 1999 + 1, 1999 + 2)
-              |    -- and ss_date between '1999-01-01' and '2001-12-31'
-              |    -- and dayofmonth(ss_date) in (1,2)
-              |        and (household_demographics.hd_dep_count = 5
-              |      or household_demographics.hd_vehicle_count = 3)
-              |    and d_date between '1999-01-01' and '1999-03-31'
-              |    and ss_sold_date_sk between 2451180 and 2451269
-              |    -- partition key filter (3 months)
-              |  group by
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    ss_addr_sk,
-              |    ca_city
-              |  ) dn
-              |  join customer on (dn.ss_customer_sk = customer.c_customer_sk)
-              |  join customer_address current_addr on
-              |    (customer.c_current_addr_sk = current_addr.ca_address_sk)
-              |where
-              |  current_addr.ca_city <> bought_city
-              |order by
-              |  c_last_name,
-              |  ss_ticket_number
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q68                                      3105 / 3405          4.8         
206.5       1.0X
-     */
-
-    ("q7", """
-             |select
-             |  i_item_id,
-             |  avg(ss_quantity) agg1,
-             |  avg(ss_list_price) agg2,
-             |  avg(ss_coupon_amt) agg3,
-             |  avg(ss_sales_price) agg4
-             |from
-             |  store_sales
-             |  join customer_demographics on
-             |    (store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)
-             |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-             |  join promotion on (store_sales.ss_promo_sk = 
promotion.p_promo_sk)
-             |  join date_dim on (ss_sold_date_sk = d_date_sk)
-             |where
-             |  cd_gender = 'F'
-             |  and cd_marital_status = 'W'
-             |  and cd_education_status = 'Primary'
-             |  and (p_channel_email = 'N'
-             |    or p_channel_event = 'N')
-             |  and d_year = 1998
-             |  and ss_sold_date_sk between 2450815 and 2451179 -- partition 
key filter
-             |group by
-             |  i_item_id
-             |order by
-             |  i_item_id
-             |limit 100
-           """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q7                                       2042 / 2333          8.1         
124.2       1.0X
-     */
-
-    ("q73", """
-              |select
-              |  c_last_name,
-              |  c_first_name,
-              |  c_salutation,
-              |  c_preferred_cust_flag,
-              |  ss_ticket_number,
-              |  cnt
-              |from
-              |  (select
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    count(*) cnt
-              |  from
-              |    store_sales
-              |    join household_demographics on
-              |      (store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk)
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |    -- join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |  where
-              |    store.s_county in
-              |      ('Williamson County','Franklin Parish','Bronx 
County','Orange County')
-              |    -- and date_dim.d_dom between 1 and 2
-              |    -- and date_dim.d_year in(1998, 1998 + 1, 1998 + 2)
-              |    -- and ss_date between '1999-01-01' and '2001-12-02'
-              |    -- and dayofmonth(ss_date) in (1,2)
-              |    -- partition key filter
-              |    -- and ss_sold_date_sk in (2450816, 2450846, 2450847, 
2450874, 2450875, 2450905,
-              |    --                         2450906, 2450935, 2450936, 
2450966, 2450967,
-              |    --                         2450996, 2450997, 2451027, 
2451028, 2451058, 2451059,
-              |    --                         2451088, 2451089, 2451119, 
2451120, 2451149,
-              |    --                         2451150, 2451180, 2451181, 
2451211, 2451212, 2451239,
-              |    --                         2451240, 2451270, 2451271, 
2451300, 2451301,
-              |    --                         2451331, 2451332, 2451361, 
2451362, 2451392, 2451393,
-              |    --                         2451423, 2451424, 2451453, 
2451454, 2451484,
-              |    --                         2451485, 2451514, 2451515, 
2451545, 2451546, 2451576,
-              |    --                         2451577, 2451605, 2451606, 
2451636, 2451637,
-              |    --                         2451666, 2451667, 2451697, 
2451698, 2451727, 2451728,
-              |    --                         2451758, 2451759, 2451789, 
2451790, 2451819,
-              |    --                         2451820, 2451850, 2451851, 
2451880, 2451881)
-              |    and (household_demographics.hd_buy_potential = '>10000'
-              |      or household_demographics.hd_buy_potential = 'unknown')
-              |    and household_demographics.hd_vehicle_count > 0
-              |    and case when household_demographics.hd_vehicle_count > 0 
then
-              |        household_demographics.hd_dep_count / 
household_demographics.hd_vehicle_count
-              |      else null end > 1
-              |    and ss_sold_date_sk between 2451180 and 2451269
-              |    -- partition key filter (3 months)
-              |  group by
-              |    ss_ticket_number,
-              |    ss_customer_sk
-              |  ) dj
-              |  join customer on (dj.ss_customer_sk = customer.c_customer_sk)
-              |where
-              |  cnt between 1 and 5
-              |order by
-              |  cnt desc
-              |limit 1000
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q73                                      1124 / 1221         13.1          
76.5       1.0X
-     */
-
-    ("q79", """
-              |select
-              |  c_last_name,
-              |  c_first_name,
-              |  substr(s_city, 1, 30) as city,
-              |  ss_ticket_number,
-              |  amt,
-              |  profit
-              |from
-              |  (select
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    s_city,
-              |    sum(ss_coupon_amt) amt,
-              |    sum(ss_net_profit) profit
-              |  from
-              |    store_sales
-              |    join household_demographics on
-              |      (store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk)
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |  where
-              |    store.s_number_employees between 200 and 295
-              |    and (household_demographics.hd_dep_count = 8
-              |      or household_demographics.hd_vehicle_count > 0)
-              |    and date_dim.d_dow = 1
-              |    and date_dim.d_year in (1998, 1998 + 1, 1998 + 2)
-              |    -- and ss_date between '1998-01-01' and '2000-12-25'
-              |    -- 156 days
-              |  and d_date between '1999-01-01' and '1999-03-31'
-              |  and ss_sold_date_sk between 2451180 and 2451269  -- partition 
key filter
-              |  group by
-              |    ss_ticket_number,
-              |    ss_customer_sk,
-              |    ss_addr_sk,
-              |    s_city
-              |  ) ms
-              |  join customer on (ms.ss_customer_sk = customer.c_customer_sk)
-              |order by
-              |  c_last_name,
-              |  c_first_name,
-              |  city,
-              |  profit
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q79                                      2029 / 2488          7.3         
137.5       1.0X
-     */
-
-      ("q8",
-        """
-          |select  s_store_name
-          |      ,sum(ss_net_profit)
-          | from store_sales
-          |     ,date_dim
-          |     ,store,
-          |     (select distinct a01.ca_zip
-          |     from
-          |     (SELECT substr(ca_zip,1,5) ca_zip
-          |      FROM customer_address
-          |      WHERE substr(ca_zip,1,5) IN ('89436', '30868', '65085', 
'22977', '83927', '77557',
-          |      '58429', '40697', '80614', '10502', '32779',
-          |      '91137', '61265', '98294', '17921', '18427', '21203', 
'59362', '87291', '84093',
-          |      '21505', '17184', '10866', '67898', '25797',
-          |      '28055', '18377', '80332', '74535', '21757', '29742', 
'90885', '29898', '17819',
-          |      '40811', '25990', '47513', '89531', '91068',
-          |      '10391', '18846', '99223', '82637', '41368', '83658', 
'86199', '81625', '26696',
-          |      '89338', '88425', '32200', '81427', '19053',
-          |      '77471', '36610', '99823', '43276', '41249', '48584', 
'83550', '82276', '18842',
-          |      '78890', '14090', '38123', '40936', '34425',
-          |      '19850', '43286', '80072', '79188', '54191', '11395', 
'50497', '84861', '90733',
-          |      '21068', '57666', '37119', '25004', '57835',
-          |      '70067', '62878', '95806', '19303', '18840', '19124', 
'29785', '16737', '16022',
-          |      '49613', '89977', '68310', '60069', '98360',
-          |      '48649', '39050', '41793', '25002', '27413', '39736', 
'47208', '16515', '94808',
-          |      '57648', '15009', '80015', '42961', '63982',
-          |      '21744', '71853', '81087', '67468', '34175', '64008', 
'20261', '11201', '51799',
-          |      '48043', '45645', '61163', '48375', '36447',
-          |      '57042', '21218', '41100', '89951', '22745', '35851', 
'83326', '61125', '78298',
-          |      '80752', '49858', '52940', '96976', '63792',
-          |      '11376', '53582', '18717', '90226', '50530', '94203', 
'99447', '27670', '96577',
-          |      '57856', '56372', '16165', '23427', '54561',
-          |      '28806', '44439', '22926', '30123', '61451', '92397', 
'56979', '92309', '70873',
-          |      '13355', '21801', '46346', '37562', '56458',
-          |      '28286', '47306', '99555', '69399', '26234', '47546', 
'49661', '88601', '35943',
-          |      '39936', '25632', '24611', '44166', '56648',
-          |      '30379', '59785', '11110', '14329', '93815', '52226', 
'71381', '13842', '25612',
-          |      '63294', '14664', '21077', '82626', '18799',
-          |      '60915', '81020', '56447', '76619', '11433', '13414', 
'42548', '92713', '70467',
-          |      '30884', '47484', '16072', '38936', '13036',
-          |      '88376', '45539', '35901', '19506', '65690', '73957', 
'71850', '49231', '14276',
-          |      '20005', '18384', '76615', '11635', '38177',
-          |      '55607', '41369', '95447', '58581', '58149', '91946', 
'33790', '76232', '75692',
-          |      '95464', '22246', '51061', '56692', '53121',
-          |      '77209', '15482', '10688', '14868', '45907', '73520', 
'72666', '25734', '17959',
-          |      '24677', '66446', '94627', '53535', '15560',
-          |      '41967', '69297', '11929', '59403', '33283', '52232', 
'57350', '43933', '40921',
-          |      '36635', '10827', '71286', '19736', '80619',
-          |      '25251', '95042', '15526', '36496', '55854', '49124', 
'81980', '35375', '49157',
-          |      '63512', '28944', '14946', '36503', '54010',
-          |      '18767', '23969', '43905', '66979', '33113', '21286', 
'58471', '59080', '13395',
-          |      '79144', '70373', '67031', '38360', '26705',
-          |      '50906', '52406', '26066', '73146', '15884', '31897', 
'30045', '61068', '45550',
-          |      '92454', '13376', '14354', '19770', '22928',
-          |      '97790', '50723', '46081', '30202', '14410', '20223', 
'88500', '67298', '13261',
-          |      '14172', '81410', '93578', '83583', '46047',
-          |      '94167', '82564', '21156', '15799', '86709', '37931', 
'74703', '83103', '23054',
-          |      '70470', '72008', '35709', '91911', '69998',
-          |      '20961', '70070', '63197', '54853', '88191', '91830', 
'49521', '19454', '81450',
-          |      '89091', '62378', '31904', '61869', '51744',
-          |      '36580', '85778', '36871', '48121', '28810', '83712', 
'45486', '67393', '26935',
-          |      '42393', '20132', '55349', '86057', '21309',
-          |      '80218', '10094', '11357', '48819', '39734', '40758', 
'30432', '21204', '29467',
-          |      '30214', '61024', '55307', '74621', '11622',
-          |      '68908', '33032', '52868', '99194', '99900', '84936', 
'69036', '99149', '45013',
-          |      '32895', '59004', '32322', '14933', '32936',
-          |      '33562', '72550', '27385', '58049', '58200', '16808', 
'21360', '32961', '18586',
-          |      '79307', '15492')) a01
-          |     inner join
-          |     (select ca_zip
-          |      from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt
-          |            FROM customer_address, customer
-          |            WHERE ca_address_sk = c_current_addr_sk and
-          |                  c_preferred_cust_flag='Y'
-          |            group by ca_zip
-          |            having count(*) > 10)A1
-          |      ) b11
-          |      on (a01.ca_zip = b11.ca_zip )) A2
-          | where ss_store_sk = s_store_sk
-          |  and ss_sold_date_sk = d_date_sk
-          |  and ss_sold_date_sk between 2451271 and 2451361
-          |  and d_qoy = 2 and d_year = 1999
-          |  and (substr(s_zip,1,2) = substr(a2.ca_zip,1,2))
-          | group by s_store_name
-          | order by s_store_name
-          |limit 100
-        """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q8                                       1737 / 2197          8.7         
115.6       1.0X
-     */
-
-      ("q82", """
-                |select
-                |  i_item_id,
-                |  i_item_desc,
-                |  i_current_price
-                |from
-                |  store_sales
-                |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-                |  join inventory on (item.i_item_sk = inventory.inv_item_sk)
-                |  join date_dim on (inventory.inv_date_sk = 
date_dim.d_date_sk)
-                |where
-                |  i_current_price between 30 and 30 + 30
-                |  and i_manufact_id in (437, 129, 727, 663)
-                |  and inv_quantity_on_hand between 100 and 500
-                |group by
-                |  i_item_id,
-                |  i_item_desc,
-                |  i_current_price
-                |order by
-                |  i_item_id
-                |limit 100
-              """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q82                                     9399 / 10245          6.8         
147.2       1.0X
-     */
-
-    ("q89", """
-              |select
-              |  *
-              |from
-              |  (select
-              |    i_category,
-              |    i_class,
-              |    i_brand,
-              |    s_store_name,
-              |    s_company_name,
-              |    d_moy,
-              |    sum(ss_sales_price) sum_sales
-              |  from
-              |    store_sales
-              |    join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |    join store on (store_sales.ss_store_sk = store.s_store_sk)
-              |    join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |  where
-              |    ss_sold_date_sk between 2451545 and 2451910  -- partition 
key filter
-              |    and d_year in (2000)
-              |    and ((i_category in('Home', 'Books', 'Electronics')
-              |          and i_class in('wallpaper', 'parenting', 'musical'))
-              |        or (i_category in('Shoes', 'Jewelry', 'Men')
-              |            and i_class in('womens', 'birdal', 'pants'))
-              |        )
-              |  group by
-              |    i_category,
-              |    i_class,
-              |    i_brand,
-              |    s_store_name,
-              |    s_company_name,
-              |    d_moy
-              |  ) tmp1
-              |order by
-              |  sum_sales,
-              |  s_store_name
-              |limit 100
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q89                                      1122 / 1274         12.9          
77.2       1.0X
-     */
-
-    ("q98", """
-              |select
-              |  i_item_desc,
-              |  i_category,
-              |  i_class,
-              |  i_current_price,
-              |  sum(ss_ext_sales_price) as itemrevenue
-              |from
-              |  store_sales
-              |  join item on (store_sales.ss_item_sk = item.i_item_sk)
-              |  join date_dim on (store_sales.ss_sold_date_sk = 
date_dim.d_date_sk)
-              |where
-              |  ss_sold_date_sk between 2451911 and 2451941
-              |  -- partition key filter (1 calendar month)
-              |  and d_date between '2001-01-01' and '2001-01-31'
-              |  and i_category in('Jewelry', 'Sports', 'Books')
-              |group by
-              |  i_item_id,
-              |  i_item_desc,
-              |  i_category,
-              |  i_class,
-              |  i_current_price
-              |order by
-              |  i_category,
-              |  i_class,
-              |  i_item_id,
-              |  i_item_desc
-              |  -- revenueratio
-              |limit 1000
-            """.stripMargin),
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    q98                                      1235 / 1542         11.8          
85.0       1.0X
-     */
-
-    ("ss_max", """
-                 |select
-                 |  count(*) as total,
-                 |  max(ss_sold_date_sk) as max_ss_sold_date_sk,
-                 |  max(ss_sold_time_sk) as max_ss_sold_time_sk,
-                 |  max(ss_item_sk) as max_ss_item_sk,
-                 |  max(ss_customer_sk) as max_ss_customer_sk,
-                 |  max(ss_cdemo_sk) as max_ss_cdemo_sk,
-                 |  max(ss_hdemo_sk) as max_ss_hdemo_sk,
-                 |  max(ss_addr_sk) as max_ss_addr_sk,
-                 |  max(ss_store_sk) as max_ss_store_sk,
-                 |  max(ss_promo_sk) as max_ss_promo_sk
-                 |from store_sales
-               """.stripMargin)
-
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_73-b02 on Mac OS X 10.11.4
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    TPCDS Snappy (scale = 5):           Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
-    
-------------------------------------------------------------------------------------------
-    ss_max                                   2305 / 2731          6.2         
160.0       1.0X
-     */
-
-  ).toArray
-
-  val tables = Seq("customer", "customer_address", "customer_demographics", 
"date_dim",
-    "household_demographics", "inventory", "item", "promotion", "store", 
"catalog_sales",
-    "web_sales", "store_sales")
-
-  def setupTables(dataLocation: String): Map[String, Long] = {
-    tables.map { tableName =>
-      
spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName)
-      tableName -> spark.table(tableName).count()
-    }.toMap
-  }
-
-  def tpcdsAll(dataLocation: String): Unit = {
-    require(dataLocation.nonEmpty,
-      "please modify the value of dataLocation to point to your local TPCDS 
data")
-    val tableSizes = setupTables(dataLocation)
-    spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
-    spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
-    tpcds.filter(q => q._1 != "").foreach {
-      case (name: String, query: String) =>
-        val numRows = spark.sql(query).queryExecution.logical.map {
-          case ur@UnresolvedRelation(t: TableIdentifier, _) =>
-            tableSizes.getOrElse(t.table, throw new 
RuntimeException(s"${t.table} not found."))
-          case _ => 0L
-        }.sum
-        val benchmark = new Benchmark("TPCDS Snappy (scale = 5)", numRows, 5)
-        benchmark.addCase(name) { i =>
-          spark.sql(query).collect()
-        }
-        benchmark.run()
-    }
-  }
-
-  def main(args: Array[String]): Unit = {
-
-    // In order to run this benchmark, please follow the instructions at
-    // https://github.com/databricks/spark-sql-perf/blob/master/README.md to 
generate the TPCDS data
-    // locally (preferably with a scale factor of 5 for benchmarking). 
Thereafter, the value of
-    // dataLocation below needs to be set to the location where the generated 
data is stored.
-    val dataLocation = ""
-
-    tpcdsAll(dataLocation)
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to