Repository: spark
Updated Branches:
  refs/heads/branch-1.5 945da3534 -> b42e13dca


[SPARK-8246] [SQL] Implement get_json_object

This is based on #7485 , thanks to NathanHowell

Tests were copied from Hive, but do not seem to be super comprehensive. I've 
generally replicated Hive's unusual behavior rather than following a JSONPath 
reference, except for one case (as noted in the comments). I don't know if 
there is a way of fully replicating Hive's behavior without a slower TreeNode 
implementation, so I've erred on the side of performance instead.

Author: Davies Liu <[email protected]>
Author: Yin Huai <[email protected]>
Author: Nathan Howell <[email protected]>

Closes #7901 from davies/get_json_object and squashes the following commits:

3ace9b9 [Davies Liu] Merge branch 'get_json_object' of github.com:davies/spark 
into get_json_object
98766fc [Davies Liu] Merge branch 'master' of github.com:apache/spark into 
get_json_object
a7dc6d0 [Davies Liu] Update JsonExpressionsSuite.scala
c818519 [Yin Huai] new results.
18ce26b [Davies Liu] fix tests
6ac29fb [Yin Huai] Golden files.
25eebef [Davies Liu] use HiveQuerySuite
e0ac6ec [Yin Huai] Golden answer files.
940c060 [Davies Liu] tweat code style
44084c5 [Davies Liu] Merge branch 'master' of github.com:apache/spark into 
get_json_object
9192d09 [Nathan Howell] Match Hive’s behavior for unwrapping arrays of one 
element
8dab647 [Nathan Howell] [SPARK-8246] [SQL] Implement get_json_object

(cherry picked from commit 73dedb589d06f7c7a525cc4f07721a77f480c434)
Signed-off-by: Davies Liu <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b42e13dc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b42e13dc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b42e13dc

Branch: refs/heads/branch-1.5
Commit: b42e13dca38c6e9ff9cf879bcb52efa681437120
Parents: 945da35
Author: Davies Liu <[email protected]>
Authored: Tue Aug 4 09:07:09 2015 -0700
Committer: Davies Liu <[email protected]>
Committed: Tue Aug 4 09:07:19 2015 -0700

----------------------------------------------------------------------
 .../catalyst/analysis/FunctionRegistry.scala    |   1 +
 .../catalyst/expressions/jsonFunctions.scala    | 309 +++++++++++++++++++
 .../expressions/JsonExpressionsSuite.scala      | 202 ++++++++++++
 .../apache/spark/sql/JsonFunctionsSuite.scala   |  32 ++
 .../hive/execution/HiveCompatibilitySuite.scala |   3 +
 .../apache/spark/sql/hive/test/TestHive.scala   |   6 +-
 ...object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f |   1 +
 ...bject #10-0-f3f47d06d7c51d493d68112b0bd6c1fc |   1 +
 ..._object #2-0-e84c2f8136919830fd665a278e4158a |   1 +
 ...object #3-0-bf140c65c31f8d892ec23e41e16e58bb |   1 +
 ...object #4-0-f0bd902edc1990c9a6c65a6bb672c4d5 |   1 +
 ..._object #5-0-3c09f4316a1533049aee8af749cdcab |   1 +
 ...object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da |   1 +
 ...object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0 |   1 +
 ...object #8-0-180b4b6fdb26011fec05a7ca99fd9844 |   1 +
 ...object #9-0-47c451a969d856f008f4d6b3d378d94b |   1 +
 .../sql/hive/execution/HiveQuerySuite.scala     |  51 +++
 17 files changed, 613 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6140d1b..43e3e9b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -179,6 +179,7 @@ object FunctionRegistry {
     expression[Decode]("decode"),
     expression[FindInSet]("find_in_set"),
     expression[FormatNumber]("format_number"),
+    expression[GetJsonObject]("get_json_object"),
     expression[InitCap]("initcap"),
     expression[Lower]("lcase"),
     expression[Lower]("lower"),

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala
new file mode 100644
index 0000000..23bfa18
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.io.{StringWriter, ByteArrayOutputStream}
+
+import com.fasterxml.jackson.core._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{StringType, DataType}
+import org.apache.spark.unsafe.types.UTF8String
+
+import scala.util.parsing.combinator.RegexParsers
+
+private[this] sealed trait PathInstruction
+private[this] object PathInstruction {
+  private[expressions] case object Subscript extends PathInstruction
+  private[expressions] case object Wildcard extends PathInstruction
+  private[expressions] case object Key extends PathInstruction
+  private[expressions] case class Index(index: Long) extends PathInstruction
+  private[expressions] case class Named(name: String) extends PathInstruction
+}
+
+private[this] sealed trait WriteStyle
+private[this] object WriteStyle {
+  private[expressions] case object RawStyle extends WriteStyle
+  private[expressions] case object QuotedStyle extends WriteStyle
+  private[expressions] case object FlattenStyle extends WriteStyle
+}
+
+private[this] object JsonPathParser extends RegexParsers {
+  import PathInstruction._
+
+  def root: Parser[Char] = '$'
+
+  def long: Parser[Long] = "\\d+".r ^? {
+    case x => x.toLong
+  }
+
+  // parse `[*]` and `[123]` subscripts
+  def subscript: Parser[List[PathInstruction]] =
+    for {
+      operand <- '[' ~> ('*' ^^^ Wildcard | long ^^ Index) <~ ']'
+    } yield {
+      Subscript :: operand :: Nil
+    }
+
+  // parse `.name` or `['name']` child expressions
+  def named: Parser[List[PathInstruction]] =
+    for {
+      name <- '.' ~> "[^\\.\\[]+".r | "[\\'" ~> "[^\\'\\?]+" <~ "\\']"
+    } yield {
+      Key :: Named(name) :: Nil
+    }
+
+  // child wildcards: `..`, `.*` or `['*']`
+  def wildcard: Parser[List[PathInstruction]] =
+    (".*" | "['*']") ^^^ List(Wildcard)
+
+  def node: Parser[List[PathInstruction]] =
+    wildcard |
+      named |
+      subscript
+
+  val expression: Parser[List[PathInstruction]] = {
+    phrase(root ~> rep(node) ^^ (x => x.flatten))
+  }
+
+  def parse(str: String): Option[List[PathInstruction]] = {
+    this.parseAll(expression, str) match {
+      case Success(result, _) =>
+        Some(result)
+
+      case NoSuccess(msg, next) =>
+        None
+    }
+  }
+}
+
+private[this] object GetJsonObject {
+  private val jsonFactory = new JsonFactory()
+
+  // Enabled for Hive compatibility
+  jsonFactory.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS)
+}
+
+/**
+ * Extracts json object from a json string based on json path specified, and 
returns json string
+ * of the extracted json object. It will return null if the input json string 
is invalid.
+ */
+case class GetJsonObject(json: Expression, path: Expression)
+  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
+
+  import GetJsonObject._
+  import PathInstruction._
+  import WriteStyle._
+  import com.fasterxml.jackson.core.JsonToken._
+
+  override def left: Expression = json
+  override def right: Expression = path
+  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+  override def dataType: DataType = StringType
+  override def prettyName: String = "get_json_object"
+
+  @transient private lazy val parsedPath = 
parsePath(path.eval().asInstanceOf[UTF8String])
+
+  override def eval(input: InternalRow): Any = {
+    val jsonStr = json.eval(input).asInstanceOf[UTF8String]
+    if (jsonStr == null) {
+      return null
+    }
+
+    val parsed = if (path.foldable) {
+      parsedPath
+    } else {
+      parsePath(path.eval(input).asInstanceOf[UTF8String])
+    }
+
+    if (parsed.isDefined) {
+      try {
+        val parser = jsonFactory.createParser(jsonStr.getBytes)
+        val output = new ByteArrayOutputStream()
+        val generator = jsonFactory.createGenerator(output, JsonEncoding.UTF8)
+        parser.nextToken()
+        val matched = evaluatePath(parser, generator, RawStyle, parsed.get)
+        generator.close()
+        if (matched) {
+          UTF8String.fromBytes(output.toByteArray)
+        } else {
+          null
+        }
+      } catch {
+        case _: JsonProcessingException => null
+      }
+    } else {
+      null
+    }
+  }
+
+  private def parsePath(path: UTF8String): Option[List[PathInstruction]] = {
+    if (path != null) {
+      JsonPathParser.parse(path.toString)
+    } else {
+      None
+    }
+  }
+
+  // advance to the desired array index, assumes to start at the START_ARRAY 
token
+  private def arrayIndex(p: JsonParser, f: () => Boolean): Long => Boolean = {
+    case _ if p.getCurrentToken == END_ARRAY =>
+      // terminate, nothing has been written
+      false
+
+    case 0 =>
+      // we've reached the desired index
+      val dirty = f()
+
+      while (p.nextToken() != END_ARRAY) {
+        // advance the token stream to the end of the array
+        p.skipChildren()
+      }
+
+      dirty
+
+    case i if i > 0 =>
+      // skip this token and evaluate the next
+      p.skipChildren()
+      p.nextToken()
+      arrayIndex(p, f)(i - 1)
+  }
+
+  /**
+   * Evaluate a list of JsonPath instructions, returning a bool that indicates 
if any leaf nodes
+   * have been written to the generator
+   */
+  private def evaluatePath(
+      p: JsonParser,
+      g: JsonGenerator,
+      style: WriteStyle,
+      path: List[PathInstruction]): Boolean = {
+    (p.getCurrentToken, path) match {
+      case (VALUE_STRING, Nil) if style == RawStyle =>
+        // there is no array wildcard or slice parent, emit this string 
without quotes
+        if (p.hasTextCharacters) {
+          g.writeRaw(p.getTextCharacters, p.getTextOffset, p.getTextLength)
+        } else {
+          g.writeRaw(p.getText)
+        }
+        true
+
+      case (START_ARRAY, Nil) if style == FlattenStyle =>
+        // flatten this array into the parent
+        var dirty = false
+        while (p.nextToken() != END_ARRAY) {
+          dirty |= evaluatePath(p, g, style, Nil)
+        }
+        dirty
+
+      case (_, Nil) =>
+        // general case: just copy the child tree verbatim
+        g.copyCurrentStructure(p)
+        true
+
+      case (START_OBJECT, Key :: xs) =>
+        var dirty = false
+        while (p.nextToken() != END_OBJECT) {
+          if (dirty) {
+            // once a match has been found we can skip other fields
+            p.skipChildren()
+          } else {
+            dirty = evaluatePath(p, g, style, xs)
+          }
+        }
+        dirty
+
+      case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs) 
=>
+        // special handling for the non-structure preserving double wildcard 
behavior in Hive
+        var dirty = false
+        g.writeStartArray()
+        while (p.nextToken() != END_ARRAY) {
+          dirty |= evaluatePath(p, g, FlattenStyle, xs)
+        }
+        g.writeEndArray()
+        dirty
+
+      case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle 
=>
+        // retain Flatten, otherwise use Quoted... cannot use Raw within an 
array
+        val nextStyle = style match {
+          case RawStyle => QuotedStyle
+          case FlattenStyle => FlattenStyle
+          case QuotedStyle => throw new IllegalStateException()
+        }
+
+        // temporarily buffer child matches, the emitted json will need to be
+        // modified slightly if there is only a single element written
+        val buffer = new StringWriter()
+        val flattenGenerator = jsonFactory.createGenerator(buffer)
+        flattenGenerator.writeStartArray()
+
+        var dirty = 0
+        while (p.nextToken() != END_ARRAY) {
+          // track the number of array elements and only emit an outer array if
+          // we've written more than one element, this matches Hive's behavior
+          dirty += (if (evaluatePath(p, flattenGenerator, nextStyle, xs)) 1 
else 0)
+        }
+        flattenGenerator.writeEndArray()
+        flattenGenerator.close()
+
+        val buf = buffer.getBuffer
+        if (dirty > 1) {
+          g.writeRawValue(buf.toString)
+        } else if (dirty == 1) {
+          // remove outer array tokens
+          g.writeRawValue(buf.substring(1, buf.length()-1))
+        } // else do not write anything
+
+        dirty > 0
+
+      case (START_ARRAY, Subscript :: Wildcard :: xs) =>
+        var dirty = false
+        g.writeStartArray()
+        while (p.nextToken() != END_ARRAY) {
+          // wildcards can have multiple matches, continually update the dirty 
count
+          dirty |= evaluatePath(p, g, QuotedStyle, xs)
+        }
+        g.writeEndArray()
+
+        dirty
+
+      case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard 
:: _)) =>
+        p.nextToken()
+        // we're going to have 1 or more results, switch to QuotedStyle
+        arrayIndex(p, () => evaluatePath(p, g, QuotedStyle, xs))(idx)
+
+      case (START_ARRAY, Subscript :: Index(idx) :: xs) =>
+        p.nextToken()
+        arrayIndex(p, () => evaluatePath(p, g, style, xs))(idx)
+
+      case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name =>
+        // exact field match
+        p.nextToken()
+        evaluatePath(p, g, style, xs)
+
+      case (FIELD_NAME, Wildcard :: xs) =>
+        // wildcard field match
+        p.nextToken()
+        evaluatePath(p, g, style, xs)
+
+      case _ =>
+        p.skipChildren()
+        false
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
new file mode 100644
index 0000000..4addbaf
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+
+class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+  val json =
+    """
+      
|{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],
+      |"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel 
Rees",
+      |"title":"Sayings of the Century","category":"reference","price":8.95},
+      |{"author":"Herman Melville","title":"Moby 
Dick","category":"fiction","price":8.99,
+      |"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord 
of the Rings",
+      
|"category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],
+      
|"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},
+      |"email":"amy@only_for_json_udf_test.net","owner":"amy","zip 
code":"94025",
+      |"fb:testid":"1234"}
+      |""".stripMargin
+
+  test("$.store.bicycle") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.bicycle")),
+      """{"price":19.95,"color":"red"}""")
+  }
+
+  test("$.store.book") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book")),
+      """[{"author":"Nigel Rees","title":"Sayings of the 
Century","category":"reference",
+        |"price":8.95},{"author":"Herman Melville","title":"Moby 
Dick","category":"fiction",
+        |"price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. 
Tolkien","title":
+        |"The Lord of the 
Rings","category":"fiction","reader":[{"age":25,"name":"bob"},
+        |{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
+        |""".stripMargin.replace("\n", ""))
+  }
+
+  test("$.store.book[0]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[0]")),
+      """{"author":"Nigel Rees","title":"Sayings of the Century",
+        |"category":"reference","price":8.95}""".stripMargin.replace("\n", ""))
+  }
+
+  test("$.store.book[*]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[*]")),
+      """[{"author":"Nigel Rees","title":"Sayings of the 
Century","category":"reference",
+        |"price":8.95},{"author":"Herman Melville","title":"Moby 
Dick","category":"fiction",
+        |"price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. 
Tolkien","title":
+        |"The Lord of the 
Rings","category":"fiction","reader":[{"age":25,"name":"bob"},
+        |{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
+        |""".stripMargin.replace("\n", ""))
+  }
+
+  test("$") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$")),
+      json.replace("\n", ""))
+  }
+
+  test("$.store.book[0].category") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[0].category")),
+      "reference")
+  }
+
+  test("$.store.book[*].category") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[*].category")),
+      """["reference","fiction","fiction"]""")
+  }
+
+  test("$.store.book[*].isbn") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[*].isbn")),
+      """["0-553-21311-3","0-395-19395-8"]""")
+  }
+
+  test("$.store.book[*].reader") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[*].reader")),
+      """[{"age":25,"name":"bob"},{"age":26,"name":"jack"}]""")
+  }
+
+  test("$.store.basket[0][1]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[0][1]")),
+      "2")
+  }
+
+  test("$.store.basket[*]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[*]")),
+      """[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]]""")
+  }
+
+  test("$.store.basket[*][0]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[*][0]")),
+      "[1,3,5]")
+  }
+
+  test("$.store.basket[0][*]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[0][*]")),
+      """[1,2,{"b":"y","a":"x"}]""")
+  }
+
+  test("$.store.basket[*][*]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[*][*]")),
+      """[1,2,{"b":"y","a":"x"},3,4,5,6]""")
+  }
+
+  test("$.store.basket[0][2].b") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[0][2].b")),
+      "y")
+  }
+
+  test("$.store.basket[0][*].b") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[0][*].b")),
+      """["y"]""")
+  }
+
+  test("$.zip code") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.zip code")),
+      "94025")
+  }
+
+  test("$.fb:testid") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.fb:testid")),
+      "1234")
+  }
+
+  test("preserve newlines") {
+    checkEvaluation(
+      GetJsonObject(Literal("""{"a":"b\nc"}"""), Literal("$.a")),
+      "b\nc")
+  }
+
+  test("escape") {
+    checkEvaluation(
+      GetJsonObject(Literal("""{"a":"b\"c"}"""), Literal("$.a")),
+      "b\"c")
+  }
+
+  test("$.non_exist_key") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.non_exist_key")),
+      null)
+  }
+
+  test("$..no_recursive") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$..no_recursive")),
+      null)
+  }
+
+  test("$.store.book[10]") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[10]")),
+      null)
+  }
+
+  test("$.store.book[0].non_exist_key") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.book[0].non_exist_key")),
+      null)
+  }
+
+  test("$.store.basket[*].non_exist_key") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store.basket[*].non_exist_key")),
+      null)
+  }
+
+  test("non foldable literal") {
+    checkEvaluation(
+      GetJsonObject(NonFoldableLiteral(json), 
NonFoldableLiteral("$.fb:testid")),
+      "1234")
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
new file mode 100644
index 0000000..71c26a6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+class JsonFunctionsSuite extends QueryTest {
+
+  private lazy val ctx = org.apache.spark.sql.test.TestSQLContext
+  import ctx.implicits._
+
+  test("function get_json_object") {
+    val df: DataFrame = Seq(("""{"name": "alice", "age": 5}""", "")).toDF("a", 
"b")
+    checkAnswer(
+      df.selectExpr("get_json_object(a, '$.name')", "get_json_object(a, 
'$.age')"),
+      Row("alice", "5"))
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index c46a4a4..d4fc6c2 100644
--- 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -266,6 +266,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with 
BeforeAndAfter {
     // Hive returns string from UTC formatted timestamp, spark returns 
timestamp type
     "date_udf",
 
+    // Can't compare the result that have newline in it
+    "udf_get_json_object",
+
     // Unlike Hive, we do support log base in (0, 1.0], therefore disable this
     "udf7",
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 8d0bf46..167086d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -374,7 +374,11 @@ class TestHiveContext(sc: SparkContext) extends 
HiveContext(sc) {
         INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1)
         SELECT title, air_date, doctor FROM episodes
       """.cmd
-      )
+      ),
+    TestTable("src_json",
+      s"""CREATE TABLE src_json (json STRING) STORED AS TEXTFILE
+       """.stripMargin.cmd,
+      s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/json.txt")}' INTO 
TABLE src_json".cmd)
   )
 
   hiveQTestUtilTables.foreach(registerTestTable)

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #1-0-f01b340b5662c45bb5f1e3b7c6900e1f
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#1-0-f01b340b5662c45bb5f1e3b7c6900e1f 
b/sql/hive/src/test/resources/golden/get_json_object 
#1-0-f01b340b5662c45bb5f1e3b7c6900e1f
new file mode 100644
index 0000000..1dcda43
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#1-0-f01b340b5662c45bb5f1e3b7c6900e1f  
@@ -0,0 +1 @@
+{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel
 Rees","title":"Sayings of the 
Century","category":"reference","price":8.95},{"author":"Herman 
Melville","title":"Moby 
Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. 
R. R. Tolkien","title":"The Lord of the 
Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},"email":"amy@only_for_json_udf_test.net","owner":"amy","zip
 code":"94025","fb:testid":"1234"}

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #10-0-f3f47d06d7c51d493d68112b0bd6c1fc
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#10-0-f3f47d06d7c51d493d68112b0bd6c1fc 
b/sql/hive/src/test/resources/golden/get_json_object 
#10-0-f3f47d06d7c51d493d68112b0bd6c1fc
new file mode 100644
index 0000000..81c545e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#10-0-f3f47d06d7c51d493d68112b0bd6c1fc 
@@ -0,0 +1 @@
+1234

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #2-0-e84c2f8136919830fd665a278e4158a
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#2-0-e84c2f8136919830fd665a278e4158a 
b/sql/hive/src/test/resources/golden/get_json_object 
#2-0-e84c2f8136919830fd665a278e4158a
new file mode 100644
index 0000000..99127db
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#2-0-e84c2f8136919830fd665a278e4158a   
@@ -0,0 +1 @@
+amy    
{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel
 Rees","title":"Sayings of the 
Century","category":"reference","price":8.95},{"author":"Herman 
Melville","title":"Moby 
Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. 
R. R. Tolkien","title":"The Lord of the 
Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}}

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #3-0-bf140c65c31f8d892ec23e41e16e58bb
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#3-0-bf140c65c31f8d892ec23e41e16e58bb 
b/sql/hive/src/test/resources/golden/get_json_object 
#3-0-bf140c65c31f8d892ec23e41e16e58bb
new file mode 100644
index 0000000..0bc0399
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#3-0-bf140c65c31f8d892ec23e41e16e58bb  
@@ -0,0 +1 @@
+{"price":19.95,"color":"red"}  [{"author":"Nigel Rees","title":"Sayings of the 
Century","category":"reference","price":8.95},{"author":"Herman 
Melville","title":"Moby 
Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. 
R. R. Tolkien","title":"The Lord of the 
Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #4-0-f0bd902edc1990c9a6c65a6bb672c4d5
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#4-0-f0bd902edc1990c9a6c65a6bb672c4d5 
b/sql/hive/src/test/resources/golden/get_json_object 
#4-0-f0bd902edc1990c9a6c65a6bb672c4d5
new file mode 100644
index 0000000..4f7e09b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#4-0-f0bd902edc1990c9a6c65a6bb672c4d5  
@@ -0,0 +1 @@
+{"author":"Nigel Rees","title":"Sayings of the 
Century","category":"reference","price":8.95}   [{"author":"Nigel 
Rees","title":"Sayings of the 
Century","category":"reference","price":8.95},{"author":"Herman 
Melville","title":"Moby 
Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. 
R. R. Tolkien","title":"The Lord of the 
Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #5-0-3c09f4316a1533049aee8af749cdcab
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#5-0-3c09f4316a1533049aee8af749cdcab 
b/sql/hive/src/test/resources/golden/get_json_object 
#5-0-3c09f4316a1533049aee8af749cdcab
new file mode 100644
index 0000000..b2d212a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#5-0-3c09f4316a1533049aee8af749cdcab   
@@ -0,0 +1 @@
+reference      ["reference","fiction","fiction"]       
["0-553-21311-3","0-395-19395-8"]       
[{"age":25,"name":"bob"},{"age":26,"name":"jack"}]

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #6-0-8334d1ddbe0f41fc7b80d4e6b45409da
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#6-0-8334d1ddbe0f41fc7b80d4e6b45409da 
b/sql/hive/src/test/resources/golden/get_json_object 
#6-0-8334d1ddbe0f41fc7b80d4e6b45409da
new file mode 100644
index 0000000..21d8862
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#6-0-8334d1ddbe0f41fc7b80d4e6b45409da  
@@ -0,0 +1 @@
+25     [25,26]

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#7-0-40d7dff94b26a2e3f4ab71baee3d3ce0 
b/sql/hive/src/test/resources/golden/get_json_object 
#7-0-40d7dff94b26a2e3f4ab71baee3d3ce0
new file mode 100644
index 0000000..e60721e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#7-0-40d7dff94b26a2e3f4ab71baee3d3ce0  
@@ -0,0 +1 @@
+2      [[1,2,{"b":"y","a":"x"}],[3,4],[5,6]]   1       [1,2,{"b":"y","a":"x"}] 
[1,2,{"b":"y","a":"x"},3,4,5,6] y       ["y"]

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #8-0-180b4b6fdb26011fec05a7ca99fd9844
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#8-0-180b4b6fdb26011fec05a7ca99fd9844 
b/sql/hive/src/test/resources/golden/get_json_object 
#8-0-180b4b6fdb26011fec05a7ca99fd9844
new file mode 100644
index 0000000..356fcdf
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#8-0-180b4b6fdb26011fec05a7ca99fd9844  
@@ -0,0 +1 @@
+NULL   NULL    NULL    NULL    NULL    NULL

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/resources/golden/get_json_object
 #9-0-47c451a969d856f008f4d6b3d378d94b
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/get_json_object 
#9-0-47c451a969d856f008f4d6b3d378d94b 
b/sql/hive/src/test/resources/golden/get_json_object 
#9-0-47c451a969d856f008f4d6b3d378d94b
new file mode 100644
index 0000000..ef4a396
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object 
#9-0-47c451a969d856f008f4d6b3d378d94b  
@@ -0,0 +1 @@
+94025

http://git-wip-us.apache.org/repos/asf/spark/blob/b42e13dc/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index a7cfac5..edb2755 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -587,6 +587,57 @@ class HiveQuerySuite extends HiveComparisonTest with 
BeforeAndAfter {
       |select * where key = 4
     """.stripMargin)
 
+  // test get_json_object again Hive, because the HiveCompatabilitySuite 
cannot handle result
+  // with newline in it.
+  createQueryTest("get_json_object #1",
+    "SELECT get_json_object(src_json.json, '$') FROM src_json")
+
+  createQueryTest("get_json_object #2",
+    "SELECT get_json_object(src_json.json, '$.owner'), 
get_json_object(src_json.json, '$.store')" +
+      " FROM src_json")
+
+  createQueryTest("get_json_object #3",
+    "SELECT get_json_object(src_json.json, '$.store.bicycle'), " +
+      "get_json_object(src_json.json, '$.store.book') FROM src_json")
+
+  createQueryTest("get_json_object #4",
+    "SELECT get_json_object(src_json.json, '$.store.book[0]'), " +
+      "get_json_object(src_json.json, '$.store.book[*]') FROM src_json")
+
+  createQueryTest("get_json_object #5",
+    "SELECT get_json_object(src_json.json, '$.store.book[0].category'), " +
+      "get_json_object(src_json.json, '$.store.book[*].category'), " +
+      "get_json_object(src_json.json, '$.store.book[*].isbn'), " +
+      "get_json_object(src_json.json, '$.store.book[*].reader') FROM src_json")
+
+  createQueryTest("get_json_object #6",
+    "SELECT get_json_object(src_json.json, '$.store.book[*].reader[0].age'), " 
+
+      "get_json_object(src_json.json, '$.store.book[*].reader[*].age') FROM 
src_json")
+
+  createQueryTest("get_json_object #7",
+    "SELECT get_json_object(src_json.json, '$.store.basket[0][1]'), " +
+      "get_json_object(src_json.json, '$.store.basket[*]'), " +
+      // Hive returns wrong result with [*][0], so this expression is change 
to make test pass
+      "get_json_object(src_json.json, '$.store.basket[0][0]'), " +
+      "get_json_object(src_json.json, '$.store.basket[0][*]'), " +
+      "get_json_object(src_json.json, '$.store.basket[*][*]'), " +
+      "get_json_object(src_json.json, '$.store.basket[0][2].b'), " +
+      "get_json_object(src_json.json, '$.store.basket[0][*].b') FROM src_json")
+
+  createQueryTest("get_json_object #8",
+    "SELECT get_json_object(src_json.json, '$.non_exist_key'), " +
+      "get_json_object(src_json.json, '$..no_recursive'), " +
+      "get_json_object(src_json.json, '$.store.book[10]'), " +
+      "get_json_object(src_json.json, '$.store.book[0].non_exist_key'), " +
+      "get_json_object(src_json.json, '$.store.basket[*].non_exist_key'), " +
+      "get_json_object(src_json.json, '$.store.basket[0][*].non_exist_key') 
FROM src_json")
+
+  createQueryTest("get_json_object #9",
+    "SELECT get_json_object(src_json.json, '$.zip code') FROM src_json")
+
+  createQueryTest("get_json_object #10",
+    "SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json")
+
   test("predicates contains an empty AttributeSet() references") {
     sql(
       """


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to