This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7083ec0  [SPARK-28215][SQL][R] as_tibble was removed from Arrow R API
7083ec0 is described below

commit 7083ec051ed47b9c6500f2107650814f0ff9206f
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Mon Jul 1 13:21:06 2019 +0900

    [SPARK-28215][SQL][R] as_tibble was removed from Arrow R API
    
    ## What changes were proposed in this pull request?
    
    New R api of Arrow has removed `as_tibble` as of 
https://github.com/apache/arrow/commit/2ef96c8623cbad1770f82e97df733bd881ab967b.
 Arrow optimization for DataFrame in R doesn't work due to the change.
    
    This can be tested as below, after installing latest Arrow:
    
    ```
    ./bin/sparkR --conf spark.sql.execution.arrow.sparkr.enabled=true
    ```
    
    ```
    > collect(createDataFrame(mtcars))
    ```
    
    Before this PR:
    ```
    > collect(createDataFrame(mtcars))
     Error in get("as_tibble", envir = asNamespace("arrow")) :
       object 'as_tibble' not found
    ```
    
    After:
    ```
    > collect(createDataFrame(mtcars))
        mpg cyl  disp  hp drat    wt  qsec vs am gear carb
    1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
    2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
    3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
    ...
    ```
    
    ## How was this patch tested?
    
    Manual test.
    
    Closes #25012 from viirya/SPARK-28215.
    
    Authored-by: Liang-Chi Hsieh <[email protected]>
    Signed-off-by: HyukjinKwon <[email protected]>
---
 R/pkg/R/DataFrame.R   | 10 ++++++++--
 R/pkg/R/deserialize.R | 13 ++++++++++---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 439cad0..6f3c7c1 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1203,7 +1203,8 @@ setMethod("collect",
               requireNamespace1 <- requireNamespace
               if (requireNamespace1("arrow", quietly = TRUE)) {
                 read_arrow <- get("read_arrow", envir = asNamespace("arrow"), 
inherits = FALSE)
-                as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+                # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
+                useAsTibble <- exists("as_tibble", envir = 
asNamespace("arrow"))
 
                 portAuth <- callJMethod(x@sdf, "collectAsArrowToR")
                 port <- portAuth[[1]]
@@ -1213,7 +1214,12 @@ setMethod("collect",
                 output <- tryCatch({
                   doServerAuth(conn, authSecret)
                   arrowTable <- read_arrow(readRaw(conn))
-                  as.data.frame(as_tibble(arrowTable), stringsAsFactors = 
stringsAsFactors)
+                  if (useAsTibble) {
+                    as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+                    as.data.frame(as_tibble(arrowTable), stringsAsFactors = 
stringsAsFactors)
+                  } else {
+                    as.data.frame(arrowTable, stringsAsFactors = 
stringsAsFactors)
+                  }
                 }, finally = {
                   close(conn)
                 })
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 191c51e..b38d245 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -237,7 +237,9 @@ readDeserializeInArrow <- function(inputCon) {
   if (requireNamespace1("arrow", quietly = TRUE)) {
     RecordBatchStreamReader <- get(
       "RecordBatchStreamReader", envir = asNamespace("arrow"), inherits = 
FALSE)
-    as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+    # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
+    useAsTibble <- exists("as_tibble", envir = asNamespace("arrow"))
+
 
     # Currently, there looks no way to read batch by batch by socket 
connection in R side,
     # See ARROW-4512. Therefore, it reads the whole Arrow streaming-formatted 
binary at once
@@ -246,8 +248,13 @@ readDeserializeInArrow <- function(inputCon) {
     arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
     batches <- RecordBatchStreamReader(arrowData)$batches()
 
-    # Read all groupped batches. Tibble -> data.frame is cheap.
-    lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
+    if (useAsTibble) {
+      as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+      # Read all groupped batches. Tibble -> data.frame is cheap.
+      lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
+    } else {
+      lapply(batches, function(batch) as.data.frame(batch))
+    }
   } else {
     stop("'arrow' package should be installed.")
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to