Repository: spark Updated Branches: refs/heads/master ae2370e72 -> 26e760581
[SPARK-9871] [SPARKR] Add expression functions into SparkR which have a variable parameter ### Summary - Add `lit` function - Add `concat`, `greatest`, `least` functions I think we need to improve `collect` function in order to implement `struct` function. Since `collect` doesn't work with arguments which includes a nested `list` variable. It seems that a list against `struct` still has `jobj` classes. So it would be better to solve this problem on another issue. ### JIRA [[SPARK-9871] Add expression functions into SparkR which have a variable parameter - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9871) Author: Yu ISHIKAWA <[email protected]> Closes #8194 from yu-iskw/SPARK-9856. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26e76058 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26e76058 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26e76058 Branch: refs/heads/master Commit: 26e760581fdf7ca913da93fa80e73b7ddabcedf6 Parents: ae2370e Author: Yu ISHIKAWA <[email protected]> Authored: Sun Aug 16 23:33:20 2015 -0700 Committer: Shivaram Venkataraman <[email protected]> Committed: Sun Aug 16 23:33:20 2015 -0700 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 4 ++++ R/pkg/R/functions.R | 42 +++++++++++++++++++++++++++++++++++ R/pkg/R/generics.R | 16 +++++++++++++ R/pkg/inst/tests/test_sparkSQL.R | 13 +++++++++++ 4 files changed, 75 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index b2d92bd..fd9dfdf 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -98,6 +98,7 @@ exportMethods("abs", "contains", "cos", "cosh", + "concat", "countDistinct", "desc", "endsWith", @@ -106,10 +107,13 @@ exportMethods("abs", "floor", "getField", "getItem", + "greatest", "hypot", "isNotNull", "isNull", + "lit", "last", + "least", "like", "log", "log10", http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index a15d2d5..6eef4d6 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -67,6 +67,14 @@ createFunctions <- function() { createFunctions() +#' @rdname functions +#' @return Creates a Column class of literal value. +setMethod("lit", signature("ANY"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) + column(jc) + }) + #' Approx Count Distinct #' #' @rdname functions @@ -94,6 +102,40 @@ setMethod("countDistinct", }) #' @rdname functions +#' @return Concatenates multiple input string columns together into a single string column. +setMethod("concat", + signature(x = "Column"), + function(x, ...) { + jcols <- lapply(list(x, ...), function(x) { x@jc }) + jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols)) + column(jc) + }) + +#' @rdname functions +#' @return Returns the greatest value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null if all parameters are null. +setMethod("greatest", + signature(x = "Column"), + function(x, ...) { + stopifnot(length(list(...)) > 0) + jcols <- lapply(list(x, ...), function(x) { x@jc }) + jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols)) + column(jc) + }) + +#' @rdname functions +#' @return Returns the least value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null iff all parameters are null. +setMethod("least", + signature(x = "Column"), + function(x, ...) { + stopifnot(length(list(...)) > 0) + jcols <- lapply(list(x, ...), function(x) { x@jc }) + jc <- callJStatic("org.apache.spark.sql.functions", "least", listToSeq(jcols)) + column(jc) + }) + +#' @rdname functions #' @aliases ceil setMethod("ceiling", signature(x = "Column"), http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index f11e7fc..5c1cc98 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -684,6 +684,10 @@ setGeneric("ceil", function(x) { standardGeneric("ceil") }) #' @rdname functions #' @export +setGeneric("concat", function(x, ...) { standardGeneric("concat") }) + +#' @rdname functions +#' @export setGeneric("crc32", function(x) { standardGeneric("crc32") }) #' @rdname functions @@ -704,6 +708,10 @@ setGeneric("explode", function(x) { standardGeneric("explode") }) #' @rdname functions #' @export +setGeneric("greatest", function(x, ...) { standardGeneric("greatest") }) + +#' @rdname functions +#' @export setGeneric("hex", function(x) { standardGeneric("hex") }) #' @rdname functions @@ -724,10 +732,18 @@ setGeneric("last_day", function(x) { standardGeneric("last_day") }) #' @rdname functions #' @export +setGeneric("least", function(x, ...) { standardGeneric("least") }) + +#' @rdname functions +#' @export setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") }) #' @rdname functions #' @export +setGeneric("lit", function(x) { standardGeneric("lit") }) + +#' @rdname functions +#' @export setGeneric("lower", function(x) { standardGeneric("lower") }) #' @rdname functions http://git-wip-us.apache.org/repos/asf/spark/blob/26e76058/R/pkg/inst/tests/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index c77f633..83caba8 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -580,6 +580,11 @@ test_that("select with column", { df2 <- select(df, df$age) expect_equal(columns(df2), c("age")) expect_equal(count(df2), 3) + + df3 <- select(df, lit("x")) + expect_equal(columns(df3), c("x")) + expect_equal(count(df3), 3) + expect_equal(collect(select(df3, "x"))[[1, 1]], "x") }) test_that("selectExpr() on a DataFrame", { @@ -712,6 +717,14 @@ test_that("string operators", { expect_equal(count(where(df, startsWith(df$name, "A"))), 1) expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi") expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30") + expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30") +}) + +test_that("greatest() and least() on a DataFrame", { + l <- list(list(a = 1, b = 2), list(a = 3, b = 4)) + df <- createDataFrame(sqlContext, l) + expect_equal(collect(select(df, greatest(df$a, df$b)))[, 1], c(2, 4)) + expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3)) }) test_that("group by", { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
