[SPARK-7588] Document all SQL/DataFrame public methods with @since tag This pull request adds since tag to all public methods/classes in SQL/DataFrame to indicate which version the methods/classes were first added.
Author: Reynold Xin <r...@databricks.com> Closes #6101 from rxin/tbc and squashes the following commits: ed55e11 [Reynold Xin] Add since version to all DataFrame methods. (cherry picked from commit 8fd55358b7fc1c7545d823bef7b39769f731c1ee) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bdd5db9f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bdd5db9f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bdd5db9f Branch: refs/heads/branch-1.4 Commit: bdd5db9f16bc5fc18357b504781cf76774f9acd6 Parents: 2cc3301 Author: Reynold Xin <r...@databricks.com> Authored: Tue May 12 18:37:02 2015 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Tue May 12 18:37:10 2015 -0700 ---------------------------------------------------------------------- .../java/org/apache/spark/sql/SaveMode.java | 10 ++ .../scala/org/apache/spark/sql/Column.scala | 145 +++++++++++++++++-- .../scala/org/apache/spark/sql/DataFrame.scala | 103 +++++++++++++ .../org/apache/spark/sql/DataFrameHolder.scala | 2 + .../apache/spark/sql/DataFrameNaFunctions.scala | 44 ++++++ .../spark/sql/DataFrameStatFunctions.scala | 18 +++ .../apache/spark/sql/ExperimentalMethods.scala | 4 + .../org/apache/spark/sql/GroupedData.scala | 22 +++ .../apache/spark/sql/JavaTypeInference.scala | 3 +- .../scala/org/apache/spark/sql/SQLContext.scala | 94 +++++++++++- .../org/apache/spark/sql/SparkSQLParser.scala | 1 - .../org/apache/spark/sql/UDFRegistration.scala | 49 +++++++ .../apache/spark/sql/UserDefinedFunction.scala | 4 + .../scala/org/apache/spark/sql/functions.scala | 140 +++++++++++++++++- .../org/apache/spark/sql/sources/filters.scala | 30 ++++ .../apache/spark/sql/sources/interfaces.scala | 57 ++++++++ .../org/apache/spark/sql/hive/HiveContext.scala | 6 + 17 files changed, 706 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java ---------------------------------------------------------------------- diff --git a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java index a40be52..9665c3c 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java +++ b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java @@ -18,28 +18,38 @@ package org.apache.spark.sql; /** * SaveMode is used to specify the expected behavior of saving a DataFrame to a data source. + * + * @since 1.3.0 */ public enum SaveMode { /** * Append mode means that when saving a DataFrame to a data source, if data/table already exists, * contents of the DataFrame are expected to be appended to existing data. + * + * @since 1.3.0 */ Append, /** * Overwrite mode means that when saving a DataFrame to a data source, * if data/table already exists, existing data is expected to be overwritten by the contents of * the DataFrame. + * + * @since 1.3.0 */ Overwrite, /** * ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists, * an exception is expected to be thrown. + * + * @since 1.3.0 */ ErrorIfExists, /** * Ignore mode means that when saving a DataFrame to a data source, if data already exists, * the save operation is expected to not save the contents of the DataFrame and to not * change the existing data. + * + * @since 1.3.0 */ Ignore } http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/Column.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 4d50821..4773ded 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -45,6 +45,8 @@ private[sql] object Column { * @groupname expr_ops Expression operators. * @groupname df_ops DataFrame functions. * @groupname Ungrouped Support functions for DataFrames. + * + * @since 1.3.0 */ @Experimental class Column(protected[sql] val expr: Expression) extends Logging { @@ -77,6 +79,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * of every struct in that array, and return an Array of fields * * @group expr_ops + * @since 1.4.0 */ def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr) @@ -92,6 +95,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def unary_- : Column = UnaryMinus(expr) @@ -107,6 +111,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def unary_! : Column = Not(expr) @@ -122,6 +127,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def === (other: Any): Column = { val right = lit(other).expr @@ -145,6 +151,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def equalTo(other: Any): Column = this === other @@ -161,6 +168,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def !== (other: Any): Column = Not(EqualTo(expr, lit(other).expr)) @@ -177,6 +185,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def notEqual(other: Any): Column = Not(EqualTo(expr, lit(other).expr)) @@ -192,6 +201,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def > (other: Any): Column = GreaterThan(expr, lit(other).expr) @@ -207,6 +217,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def gt(other: Any): Column = this > other @@ -221,6 +232,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def < (other: Any): Column = LessThan(expr, lit(other).expr) @@ -235,6 +247,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def lt(other: Any): Column = this < other @@ -249,6 +262,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def <= (other: Any): Column = LessThanOrEqual(expr, lit(other).expr) @@ -263,6 +277,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def leq(other: Any): Column = this <= other @@ -277,6 +292,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def >= (other: Any): Column = GreaterThanOrEqual(expr, lit(other).expr) @@ -291,6 +307,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def geq(other: Any): Column = this >= other @@ -298,6 +315,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Equality test that is safe for null values. * * @group expr_ops + * @since 1.3.0 */ def <=> (other: Any): Column = EqualNullSafe(expr, lit(other).expr) @@ -305,6 +323,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Equality test that is safe for null values. * * @group java_expr_ops + * @since 1.3.0 */ def eqNullSafe(other: Any): Column = this <=> other @@ -312,6 +331,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * True if the current column is between the lower bound and upper bound, inclusive. * * @group java_expr_ops + * @since 1.4.0 */ def between(lowerBound: Any, upperBound: Any): Column = { (this >= lowerBound) && (this <= upperBound) @@ -321,6 +341,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * True if the current expression is null. * * @group expr_ops + * @since 1.3.0 */ def isNull: Column = IsNull(expr) @@ -328,6 +349,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * True if the current expression is NOT null. * * @group expr_ops + * @since 1.3.0 */ def isNotNull: Column = IsNotNull(expr) @@ -342,6 +364,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def || (other: Any): Column = Or(expr, lit(other).expr) @@ -356,6 +379,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def or(other: Column): Column = this || other @@ -370,6 +394,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def && (other: Any): Column = And(expr, lit(other).expr) @@ -384,6 +409,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def and(other: Column): Column = this && other @@ -398,6 +424,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def + (other: Any): Column = Add(expr, lit(other).expr) @@ -412,6 +439,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def plus(other: Any): Column = this + other @@ -426,6 +454,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def - (other: Any): Column = Subtract(expr, lit(other).expr) @@ -440,6 +469,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def minus(other: Any): Column = this - other @@ -454,6 +484,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def * (other: Any): Column = Multiply(expr, lit(other).expr) @@ -468,6 +499,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def multiply(other: Any): Column = this * other @@ -482,6 +514,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def / (other: Any): Column = Divide(expr, lit(other).expr) @@ -496,6 +529,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def divide(other: Any): Column = this / other @@ -503,6 +537,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Modulo (a.k.a. remainder) expression. * * @group expr_ops + * @since 1.3.0 */ def % (other: Any): Column = Remainder(expr, lit(other).expr) @@ -510,6 +545,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Modulo (a.k.a. remainder) expression. * * @group java_expr_ops + * @since 1.3.0 */ def mod(other: Any): Column = this % other @@ -518,6 +554,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * by the evaluated values of the arguments. * * @group expr_ops + * @since 1.3.0 */ @scala.annotation.varargs def in(list: Column*): Column = In(expr, list.map(_.expr)) @@ -526,6 +563,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * SQL like expression. * * @group expr_ops + * @since 1.3.0 */ def like(literal: String): Column = Like(expr, lit(literal).expr) @@ -533,6 +571,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * SQL RLIKE expression (LIKE with Regex). * * @group expr_ops + * @since 1.3.0 */ def rlike(literal: String): Column = RLike(expr, lit(literal).expr) @@ -541,6 +580,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * or gets a value by key `key` in a [[MapType]]. * * @group expr_ops + * @since 1.3.0 */ def getItem(key: Any): Column = UnresolvedExtractValue(expr, Literal(key)) @@ -548,6 +588,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * An expression that gets a field by name in a [[StructType]]. * * @group expr_ops + * @since 1.3.0 */ def getField(fieldName: String): Column = UnresolvedExtractValue(expr, Literal(fieldName)) @@ -557,6 +598,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @param len expression for the length of the substring. * * @group expr_ops + * @since 1.3.0 */ def substr(startPos: Column, len: Column): Column = Substring(expr, startPos.expr, len.expr) @@ -566,6 +608,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @param len length of the substring. * * @group expr_ops + * @since 1.3.0 */ def substr(startPos: Int, len: Int): Column = Substring(expr, lit(startPos).expr, lit(len).expr) @@ -573,6 +616,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Contains the other element. * * @group expr_ops + * @since 1.3.0 */ def contains(other: Any): Column = Contains(expr, lit(other).expr) @@ -580,6 +624,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String starts with. * * @group expr_ops + * @since 1.3.0 */ def startsWith(other: Column): Column = StartsWith(expr, lit(other).expr) @@ -587,6 +632,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String starts with another string literal. * * @group expr_ops + * @since 1.3.0 */ def startsWith(literal: String): Column = this.startsWith(lit(literal)) @@ -594,6 +640,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String ends with. * * @group expr_ops + * @since 1.3.0 */ def endsWith(other: Column): Column = EndsWith(expr, lit(other).expr) @@ -601,6 +648,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String ends with another string literal. * * @group expr_ops + * @since 1.3.0 */ def endsWith(literal: String): Column = this.endsWith(lit(literal)) @@ -612,6 +660,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: String): Column = Alias(expr, alias)() @@ -623,6 +672,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: Symbol): Column = Alias(expr, alias.name)() @@ -634,6 +684,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: String, metadata: Metadata): Column = { Alias(expr, alias)(explicitMetadata = Some(metadata)) @@ -651,6 +702,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def cast(to: DataType): Column = expr match { // Lift alias out of cast so we can support col.as("name").cast(IntegerType) @@ -668,6 +720,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def cast(to: String): Column = cast(DataTypeParser.parse(to)) @@ -682,6 +735,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def desc: Column = SortOrder(expr, Descending) @@ -696,6 +750,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def asc: Column = SortOrder(expr, Ascending) @@ -703,6 +758,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Prints the expression to the console for debugging purpose. * * @group df_ops + * @since 1.3.0 */ def explain(extended: Boolean): Unit = { if (extended) { @@ -719,6 +775,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.4.0 */ def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr) @@ -729,6 +786,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.4.0 */ def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr) @@ -739,6 +797,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.4.0 */ def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr) @@ -748,61 +807,115 @@ class Column(protected[sql] val expr: Expression) extends Logging { /** * :: Experimental :: * A convenient class used for constructing schema. + * + * @since 1.3.0 */ @Experimental class ColumnName(name: String) extends Column(name) { - /** Creates a new AttributeReference of type boolean */ + /** + * Creates a new [[StructField]] of type boolean. + * @since 1.3.0 + */ def boolean: StructField = StructField(name, BooleanType) - /** Creates a new AttributeReference of type byte */ + /** + * Creates a new [[StructField]] of type byte. + * @since 1.3.0 + */ def byte: StructField = StructField(name, ByteType) - /** Creates a new AttributeReference of type short */ + /** + * Creates a new [[StructField]] of type short. + * @since 1.3.0 + */ def short: StructField = StructField(name, ShortType) - /** Creates a new AttributeReference of type int */ + /** + * Creates a new [[StructField]] of type int. + * @since 1.3.0 + */ def int: StructField = StructField(name, IntegerType) - /** Creates a new AttributeReference of type long */ + /** + * Creates a new [[StructField]] of type long. + * @since 1.3.0 + */ def long: StructField = StructField(name, LongType) - /** Creates a new AttributeReference of type float */ + /** + * Creates a new [[StructField]] of type float. + * @since 1.3.0 + */ def float: StructField = StructField(name, FloatType) - /** Creates a new AttributeReference of type double */ + /** + * Creates a new [[StructField]] of type double. + * @since 1.3.0 + */ def double: StructField = StructField(name, DoubleType) - /** Creates a new AttributeReference of type string */ + /** + * Creates a new [[StructField]] of type string. + * @since 1.3.0 + */ def string: StructField = StructField(name, StringType) - /** Creates a new AttributeReference of type date */ + /** + * Creates a new [[StructField]] of type date. + * @since 1.3.0 + */ def date: StructField = StructField(name, DateType) - /** Creates a new AttributeReference of type decimal */ + /** + * Creates a new [[StructField]] of type decimal. + * @since 1.3.0 + */ def decimal: StructField = StructField(name, DecimalType.Unlimited) - /** Creates a new AttributeReference of type decimal */ + /** + * Creates a new [[StructField]] of type decimal. + * @since 1.3.0 + */ def decimal(precision: Int, scale: Int): StructField = StructField(name, DecimalType(precision, scale)) - /** Creates a new AttributeReference of type timestamp */ + /** + * Creates a new [[StructField]] of type timestamp. + * @since 1.3.0 + */ def timestamp: StructField = StructField(name, TimestampType) - /** Creates a new AttributeReference of type binary */ + /** + * Creates a new [[StructField]] of type binary. + * @since 1.3.0 + */ def binary: StructField = StructField(name, BinaryType) - /** Creates a new AttributeReference of type array */ + /** + * Creates a new [[StructField]] of type array. + * @since 1.3.0 + */ def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType)) - /** Creates a new AttributeReference of type map */ + /** + * Creates a new [[StructField]] of type map. + * @since 1.3.0 + */ def map(keyType: DataType, valueType: DataType): StructField = map(MapType(keyType, valueType)) def map(mapType: MapType): StructField = StructField(name, mapType) - /** Creates a new AttributeReference of type struct */ + /** + * Creates a new [[StructField]] of type struct. + * @since 1.3.0 + */ def struct(fields: StructField*): StructField = struct(StructType(fields)) + /** + * Creates a new [[StructField]] of type struct. + * @since 1.3.0 + */ def struct(structType: StructType): StructField = StructField(name, structType) } http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 01fd432..c820a67 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -114,6 +114,7 @@ private[sql] object DataFrame { * @groupname rdd RDD Operations * @groupname output Output Operations * @groupname action Actions + * @since 1.3.0 */ // TODO: Improve documentation. @Experimental @@ -233,6 +234,7 @@ class DataFrame private[sql]( /** * Returns the object itself. * @group basic + * @since 1.3.0 */ // This is declared with parentheses to prevent the Scala compiler from treating // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame. @@ -247,6 +249,7 @@ class DataFrame private[sql]( * rdd.toDF("id", "name") // this creates a DataFrame with column name "id" and "name" * }}} * @group basic + * @since 1.3.0 */ @scala.annotation.varargs def toDF(colNames: String*): DataFrame = { @@ -264,12 +267,14 @@ class DataFrame private[sql]( /** * Returns the schema of this [[DataFrame]]. * @group basic + * @since 1.3.0 */ def schema: StructType = queryExecution.analyzed.schema /** * Returns all column names and their data types as an array. * @group basic + * @since 1.3.0 */ def dtypes: Array[(String, String)] = schema.fields.map { field => (field.name, field.dataType.toString) @@ -278,18 +283,21 @@ class DataFrame private[sql]( /** * Returns all column names as an array. * @group basic + * @since 1.3.0 */ def columns: Array[String] = schema.fields.map(_.name) /** * Prints the schema to the console in a nice tree format. * @group basic + * @since 1.3.0 */ def printSchema(): Unit = println(schema.treeString) /** * Prints the plans (logical and physical) to the console for debugging purposes. * @group basic + * @since 1.3.0 */ def explain(extended: Boolean): Unit = { ExplainCommand( @@ -302,6 +310,7 @@ class DataFrame private[sql]( /** * Only prints the physical plan to the console for debugging purposes. * @group basic + * @since 1.3.0 */ def explain(): Unit = explain(extended = false) @@ -309,6 +318,7 @@ class DataFrame private[sql]( * Returns true if the `collect` and `take` methods can be run locally * (without any Spark executors). * @group basic + * @since 1.3.0 */ def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation] @@ -325,12 +335,14 @@ class DataFrame private[sql]( * @param numRows Number of rows to show * * @group action + * @since 1.3.0 */ def show(numRows: Int): Unit = println(showString(numRows)) /** * Displays the top 20 rows of [[DataFrame]] in a tabular form. * @group action + * @since 1.3.0 */ def show(): Unit = show(20) @@ -342,6 +354,7 @@ class DataFrame private[sql]( * }}} * * @group dfops + * @since 1.3.1 */ def na: DataFrameNaFunctions = new DataFrameNaFunctions(this) @@ -353,6 +366,7 @@ class DataFrame private[sql]( * }}} * * @group dfops + * @since 1.4.0 */ def stat: DataFrameStatFunctions = new DataFrameStatFunctions(this) @@ -363,6 +377,7 @@ class DataFrame private[sql]( * * @param right Right side of the join operation. * @group dfops + * @since 1.3.0 */ def join(right: DataFrame): DataFrame = { Join(logicalPlan, right.logicalPlan, joinType = Inner, None) @@ -386,6 +401,7 @@ class DataFrame private[sql]( * @param right Right side of the join operation. * @param usingColumn Name of the column to join on. This column must exist on both sides. * @group dfops + * @since 1.4.0 */ def join(right: DataFrame, usingColumn: String): DataFrame = { // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right @@ -416,6 +432,7 @@ class DataFrame private[sql]( * df1.join(df2).where($"df1Key" === $"df2Key") * }}} * @group dfops + * @since 1.3.0 */ def join(right: DataFrame, joinExprs: Column): DataFrame = join(right, joinExprs, "inner") @@ -437,6 +454,7 @@ class DataFrame private[sql]( * @param joinExprs Join expression. * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`. * @group dfops + * @since 1.3.0 */ def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = { // Note that in this function, we introduce a hack in the case of self-join to automatically @@ -483,6 +501,7 @@ class DataFrame private[sql]( * df.sort($"sortcol".asc) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def sort(sortCol: String, sortCols: String*): DataFrame = { @@ -495,6 +514,7 @@ class DataFrame private[sql]( * df.sort($"col1", $"col2".desc) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def sort(sortExprs: Column*): DataFrame = { @@ -513,6 +533,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] sorted by the given expressions. * This is an alias of the `sort` function. * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def orderBy(sortCol: String, sortCols: String*): DataFrame = sort(sortCol, sortCols :_*) @@ -521,6 +542,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] sorted by the given expressions. * This is an alias of the `sort` function. * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def orderBy(sortExprs: Column*): DataFrame = sort(sortExprs :_*) @@ -528,12 +550,14 @@ class DataFrame private[sql]( /** * Selects column based on the column name and return it as a [[Column]]. * @group dfops + * @since 1.3.0 */ def apply(colName: String): Column = col(colName) /** * Selects column based on the column name and return it as a [[Column]]. * @group dfops + * @since 1.3.0 */ def col(colName: String): Column = colName match { case "*" => @@ -546,12 +570,14 @@ class DataFrame private[sql]( /** * Returns a new [[DataFrame]] with an alias set. * @group dfops + * @since 1.3.0 */ def as(alias: String): DataFrame = Subquery(alias, logicalPlan) /** * (Scala-specific) Returns a new [[DataFrame]] with an alias set. * @group dfops + * @since 1.3.0 */ def as(alias: Symbol): DataFrame = as(alias.name) @@ -561,6 +587,7 @@ class DataFrame private[sql]( * df.select($"colA", $"colB" + 1) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def select(cols: Column*): DataFrame = { @@ -583,6 +610,7 @@ class DataFrame private[sql]( * df.select($"colA", $"colB") * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) :_*) @@ -595,6 +623,7 @@ class DataFrame private[sql]( * df.selectExpr("colA", "colB as newName", "abs(colC)") * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def selectExpr(exprs: String*): DataFrame = { @@ -612,6 +641,7 @@ class DataFrame private[sql]( * peopleDf($"age" > 15) * }}} * @group dfops + * @since 1.3.0 */ def filter(condition: Column): DataFrame = Filter(condition.expr, logicalPlan) @@ -621,6 +651,7 @@ class DataFrame private[sql]( * peopleDf.filter("age > 15") * }}} * @group dfops + * @since 1.3.0 */ def filter(conditionExpr: String): DataFrame = { filter(Column(new SqlParser().parseExpression(conditionExpr))) @@ -635,6 +666,7 @@ class DataFrame private[sql]( * peopleDf($"age" > 15) * }}} * @group dfops + * @since 1.3.0 */ def where(condition: Column): DataFrame = filter(condition) @@ -653,6 +685,7 @@ class DataFrame private[sql]( * )) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr)) @@ -675,6 +708,7 @@ class DataFrame private[sql]( * )) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def groupBy(col1: String, cols: String*): GroupedData = { @@ -690,6 +724,7 @@ class DataFrame private[sql]( * df.groupBy().agg("age" -> "max", "salary" -> "avg") * }}} * @group dfops + * @since 1.3.0 */ def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = { groupBy().agg(aggExpr, aggExprs :_*) @@ -703,6 +738,7 @@ class DataFrame private[sql]( * df.groupBy().agg(Map("age" -> "max", "salary" -> "avg")) * }}} * @group dfops + * @since 1.3.0 */ def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs) @@ -714,6 +750,7 @@ class DataFrame private[sql]( * df.groupBy().agg(Map("age" -> "max", "salary" -> "avg")) * }}} * @group dfops + * @since 1.3.0 */ def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs) @@ -725,6 +762,7 @@ class DataFrame private[sql]( * df.groupBy().agg(max($"age"), avg($"salary")) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs :_*) @@ -733,6 +771,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] by taking the first `n` rows. The difference between this function * and `head` is that `head` returns an array while `limit` returns a new [[DataFrame]]. * @group dfops + * @since 1.3.0 */ def limit(n: Int): DataFrame = Limit(Literal(n), logicalPlan) @@ -740,6 +779,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing union of rows in this frame and another frame. * This is equivalent to `UNION ALL` in SQL. * @group dfops + * @since 1.3.0 */ def unionAll(other: DataFrame): DataFrame = Union(logicalPlan, other.logicalPlan) @@ -747,6 +787,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing rows only in both this frame and another frame. * This is equivalent to `INTERSECT` in SQL. * @group dfops + * @since 1.3.0 */ def intersect(other: DataFrame): DataFrame = Intersect(logicalPlan, other.logicalPlan) @@ -754,6 +795,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing rows in this frame but not in another frame. * This is equivalent to `EXCEPT` in SQL. * @group dfops + * @since 1.3.0 */ def except(other: DataFrame): DataFrame = Except(logicalPlan, other.logicalPlan) @@ -764,6 +806,7 @@ class DataFrame private[sql]( * @param fraction Fraction of rows to generate. * @param seed Seed for sampling. * @group dfops + * @since 1.3.0 */ def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = { Sample(0.0, fraction, withReplacement, seed, logicalPlan) @@ -775,6 +818,7 @@ class DataFrame private[sql]( * @param withReplacement Sample with replacement or not. * @param fraction Fraction of rows to generate. * @group dfops + * @since 1.3.0 */ def sample(withReplacement: Boolean, fraction: Double): DataFrame = { sample(withReplacement, fraction, Utils.random.nextLong) @@ -786,6 +830,7 @@ class DataFrame private[sql]( * @param weights weights for splits, will be normalized if they don't sum to 1. * @param seed Seed for sampling. * @group dfops + * @since 1.4.0 */ def randomSplit(weights: Array[Double], seed: Long): Array[DataFrame] = { val sum = weights.sum @@ -800,6 +845,7 @@ class DataFrame private[sql]( * * @param weights weights for splits, will be normalized if they don't sum to 1. * @group dfops + * @since 1.4.0 */ def randomSplit(weights: Array[Double]): Array[DataFrame] = { randomSplit(weights, Utils.random.nextLong) @@ -836,6 +882,7 @@ class DataFrame private[sql]( * val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title")) * }}} * @group dfops + * @since 1.3.0 */ def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = { val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] @@ -860,6 +907,7 @@ class DataFrame private[sql]( * df.explode("words", "word")(words: String => words.split(" ")) * }}} * @group dfops + * @since 1.3.0 */ def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B]) : DataFrame = { @@ -883,6 +931,7 @@ class DataFrame private[sql]( /** * Returns a new [[DataFrame]] by adding a column. * @group dfops + * @since 1.3.0 */ def withColumn(colName: String, col: Column): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -902,6 +951,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] with a column renamed. * This is a no-op if schema doesn't contain existingName. * @group dfops + * @since 1.3.0 */ def withColumnRenamed(existingName: String, newName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -921,6 +971,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] with a column dropped. * This is a no-op if schema doesn't contain column name. * @group dfops + * @since 1.4.0 */ def drop(colName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -940,6 +991,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]]. * This is an alias for `distinct`. * @group dfops + * @since 1.4.0 */ def dropDuplicates(): DataFrame = dropDuplicates(this.columns) @@ -948,6 +1000,7 @@ class DataFrame private[sql]( * the subset of columns. * * @group dfops + * @since 1.4.0 */ def dropDuplicates(colNames: Seq[String]): DataFrame = { val groupCols = colNames.map(resolve) @@ -967,6 +1020,7 @@ class DataFrame private[sql]( * the subset of columns. * * @group dfops + * @since 1.4.0 */ def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq) @@ -991,6 +1045,7 @@ class DataFrame private[sql]( * }}} * * @group action + * @since 1.3.1 */ @scala.annotation.varargs def describe(cols: String*): DataFrame = { @@ -1034,24 +1089,28 @@ class DataFrame private[sql]( /** * Returns the first `n` rows. * @group action + * @since 1.3.0 */ def head(n: Int): Array[Row] = limit(n).collect() /** * Returns the first row. * @group action + * @since 1.3.0 */ def head(): Row = head(1).head /** * Returns the first row. Alias for head(). * @group action + * @since 1.3.0 */ override def first(): Row = head() /** * Returns a new RDD by applying a function to all rows of this DataFrame. * @group rdd + * @since 1.3.0 */ override def map[R: ClassTag](f: Row => R): RDD[R] = rdd.map(f) @@ -1059,12 +1118,14 @@ class DataFrame private[sql]( * Returns a new RDD by first applying a function to all rows of this [[DataFrame]], * and then flattening the results. * @group rdd + * @since 1.3.0 */ override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = rdd.flatMap(f) /** * Returns a new RDD by applying a function to each partition of this DataFrame. * @group rdd + * @since 1.3.0 */ override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = { rdd.mapPartitions(f) @@ -1073,42 +1134,49 @@ class DataFrame private[sql]( /** * Applies a function `f` to all rows. * @group rdd + * @since 1.3.0 */ override def foreach(f: Row => Unit): Unit = rdd.foreach(f) /** * Applies a function f to each partition of this [[DataFrame]]. * @group rdd + * @since 1.3.0 */ override def foreachPartition(f: Iterator[Row] => Unit): Unit = rdd.foreachPartition(f) /** * Returns the first `n` rows in the [[DataFrame]]. * @group action + * @since 1.3.0 */ override def take(n: Int): Array[Row] = head(n) /** * Returns an array that contains all of [[Row]]s in this [[DataFrame]]. * @group action + * @since 1.3.0 */ override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect() /** * Returns a Java list that contains all of [[Row]]s in this [[DataFrame]]. * @group action + * @since 1.3.0 */ override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() :_*) /** * Returns the number of rows in the [[DataFrame]]. * @group action + * @since 1.3.0 */ override def count(): Long = groupBy().count().collect().head.getLong(0) /** * Returns a new [[DataFrame]] that has exactly `numPartitions` partitions. * @group rdd + * @since 1.3.0 */ override def repartition(numPartitions: Int): DataFrame = { Repartition(numPartitions, shuffle = true, logicalPlan) @@ -1120,6 +1188,7 @@ class DataFrame private[sql]( * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of * the 100 new partitions will claim 10 of the current partitions. * @group rdd + * @since 1.4.0 */ override def coalesce(numPartitions: Int): DataFrame = { Repartition(numPartitions, shuffle = false, logicalPlan) @@ -1129,11 +1198,13 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]]. * This is an alias for `dropDuplicates`. * @group dfops + * @since 1.3.0 */ override def distinct: DataFrame = Distinct(logicalPlan) /** * @group basic + * @since 1.3.0 */ override def persist(): this.type = { sqlContext.cacheManager.cacheQuery(this) @@ -1142,11 +1213,13 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def cache(): this.type = persist() /** * @group basic + * @since 1.3.0 */ override def persist(newLevel: StorageLevel): this.type = { sqlContext.cacheManager.cacheQuery(this, None, newLevel) @@ -1155,6 +1228,7 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def unpersist(blocking: Boolean): this.type = { sqlContext.cacheManager.tryUncacheQuery(this, blocking) @@ -1163,6 +1237,7 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def unpersist(): this.type = unpersist(blocking = false) @@ -1175,6 +1250,7 @@ class DataFrame private[sql]( * memoized. Once called, it won't change even if you change any query planning related Spark SQL * configurations (e.g. `spark.sql.shuffle.partitions`). * @group rdd + * @since 1.3.0 */ lazy val rdd: RDD[Row] = { // use a local variable to make sure the map closure doesn't capture the whole DataFrame @@ -1188,12 +1264,14 @@ class DataFrame private[sql]( /** * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s. * @group rdd + * @since 1.3.0 */ def toJavaRDD: JavaRDD[Row] = rdd.toJavaRDD() /** * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s. * @group rdd + * @since 1.3.0 */ def javaRDD: JavaRDD[Row] = toJavaRDD @@ -1202,6 +1280,7 @@ class DataFrame private[sql]( * temporary table is tied to the [[SQLContext]] that was used to create this DataFrame. * * @group basic + * @since 1.3.0 */ def registerTempTable(tableName: String): Unit = { sqlContext.registerDataFrameAsTable(this, tableName) @@ -1212,6 +1291,7 @@ class DataFrame private[sql]( * Files that are written out using this method can be read back in as a [[DataFrame]] * using the `parquetFile` function in [[SQLContext]]. * @group output + * @since 1.3.0 */ def saveAsParquetFile(path: String): Unit = { if (sqlContext.conf.parquetUseDataSourceApi) { @@ -1235,6 +1315,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String): Unit = { @@ -1254,6 +1335,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String, mode: SaveMode): Unit = { @@ -1281,6 +1363,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String, source: String): Unit = { @@ -1300,6 +1383,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = { @@ -1319,6 +1403,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable( @@ -1340,6 +1425,7 @@ class DataFrame private[sql]( * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. * @group output + * @since 1.4.0 */ @Experimental def saveAsTable( @@ -1365,6 +1451,7 @@ class DataFrame private[sql]( * Also note that while this function can persist the table metadata into Hive's metastore, * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @since 1.3.0 */ @Experimental def saveAsTable( @@ -1396,6 +1483,7 @@ class DataFrame private[sql]( * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. * @group output + * @since 1.4.0 */ @Experimental def saveAsTable( @@ -1421,6 +1509,7 @@ class DataFrame private[sql]( * using the default data source configured by spark.sql.sources.default and * [[SaveMode.ErrorIfExists]] as the save mode. * @group output + * @since 1.3.0 */ @Experimental def save(path: String): Unit = { @@ -1432,6 +1521,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode, * using the default data source configured by spark.sql.sources.default. * @group output + * @since 1.3.0 */ @Experimental def save(path: String, mode: SaveMode): Unit = { @@ -1444,6 +1534,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source, * using [[SaveMode.ErrorIfExists]] as the save mode. * @group output + * @since 1.3.0 */ @Experimental def save(path: String, source: String): Unit = { @@ -1455,6 +1546,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source and * [[SaveMode]] specified by mode. * @group output + * @since 1.3.0 */ @Experimental def save(path: String, source: String, mode: SaveMode): Unit = { @@ -1466,6 +1558,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame based on the given data source, * [[SaveMode]] specified by mode, and a set of options. * @group output + * @since 1.3.0 */ @Experimental def save( @@ -1480,6 +1573,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source, * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`. * @group output + * @since 1.4.0 */ @Experimental def save( @@ -1496,6 +1590,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame based on the given data source, * [[SaveMode]] specified by mode, and a set of options * @group output + * @since 1.3.0 */ @Experimental def save( @@ -1510,6 +1605,7 @@ class DataFrame private[sql]( * Saves the contents of this DataFrame to the given path based on the given data source, * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`. * @group output + * @since 1.4.0 */ @Experimental def save( @@ -1524,6 +1620,7 @@ class DataFrame private[sql]( * :: Experimental :: * Adds the rows from this RDD to the specified table, optionally overwriting the existing data. * @group output + * @since 1.3.0 */ @Experimental def insertInto(tableName: String, overwrite: Boolean): Unit = { @@ -1536,6 +1633,7 @@ class DataFrame private[sql]( * Adds the rows from this RDD to the specified table. * Throws an exception if the table already exists. * @group output + * @since 1.3.0 */ @Experimental def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false) @@ -1543,6 +1641,7 @@ class DataFrame private[sql]( /** * Returns the content of the [[DataFrame]] as a RDD of JSON strings. * @group rdd + * @since 1.3.0 */ def toJSON: RDD[String] = { val rowSchema = this.schema @@ -1581,6 +1680,7 @@ class DataFrame private[sql]( * given name; if you pass `false`, it will throw if the table already * exists. * @group output + * @since 1.3.0 */ def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = { createJDBCTable(url, table, allowExisting, new Properties()) @@ -1594,6 +1694,7 @@ class DataFrame private[sql]( * given name; if you pass `false`, it will throw if the table already * exists. * @group output + * @since 1.4.0 */ def createJDBCTable( url: String, @@ -1626,6 +1727,7 @@ class DataFrame private[sql]( * the RDD in order via the simple statement * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail. * @group output + * @since 1.3.0 */ def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = { insertIntoJDBC(url, table, overwrite, new Properties()) @@ -1643,6 +1745,7 @@ class DataFrame private[sql]( * the RDD in order via the simple statement * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail. * @group output + * @since 1.4.0 */ def insertIntoJDBC( url: String, http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala index a3187fe..b87efb5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql /** * A container for a [[DataFrame]], used for implicit conversions. + * + * @since 1.3.0 */ private[sql] case class DataFrameHolder(df: DataFrame) { http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala index 4a54120..b4c2daa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala @@ -30,12 +30,16 @@ import org.apache.spark.sql.types._ /** * :: Experimental :: * Functionality for working with missing data in [[DataFrame]]s. + * + * @since 1.3.1 */ @Experimental final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that drops rows containing any null values. + * + * @since 1.3.1 */ def drop(): DataFrame = drop("any", df.columns) @@ -44,18 +48,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values. * If `how` is "all", then drop rows only if every column is null for that row. + * + * @since 1.3.1 */ def drop(how: String): DataFrame = drop(how, df.columns) /** * Returns a new [[DataFrame]] that drops rows containing any null values * in the specified columns. + * + * @since 1.3.1 */ def drop(cols: Array[String]): DataFrame = drop(cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame ]] that drops rows containing any null values * in the specified columns. + * + * @since 1.3.1 */ def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols) @@ -65,6 +75,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values in the specified columns. * If `how` is "all", then drop rows only if every specified column is null for that row. + * + * @since 1.3.1 */ def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq) @@ -74,6 +86,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values in the specified columns. * If `how` is "all", then drop rows only if every specified column is null for that row. + * + * @since 1.3.1 */ def drop(how: String, cols: Seq[String]): DataFrame = { how.toLowerCase match { @@ -85,18 +99,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null values. + * + * @since 1.3.1 */ def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns) /** * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null * values in the specified columns. + * + * @since 1.3.1 */ def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than * `minNonNulls` non-null values in the specified columns. + * + * @since 1.3.1 */ def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = { // Filtering condition -- only keep the row if it has at least `minNonNulls` non-null values. @@ -106,23 +126,31 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that replaces null values in numeric columns with `value`. + * + * @since 1.3.1 */ def fill(value: Double): DataFrame = fill(value, df.columns) /** * Returns a new [[DataFrame ]] that replaces null values in string columns with `value`. + * + * @since 1.3.1 */ def fill(value: String): DataFrame = fill(value, df.columns) /** * Returns a new [[DataFrame]] that replaces null values in specified numeric columns. * If a specified column is not a numeric column, it is ignored. + * + * @since 1.3.1 */ def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in specified * numeric columns. If a specified column is not a numeric column, it is ignored. + * + * @since 1.3.1 */ def fill(value: Double, cols: Seq[String]): DataFrame = { val columnEquals = df.sqlContext.analyzer.resolver @@ -140,12 +168,16 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that replaces null values in specified string columns. * If a specified column is not a string column, it is ignored. + * + * @since 1.3.1 */ def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in * specified string columns. If a specified column is not a string column, it is ignored. + * + * @since 1.3.1 */ def fill(value: String, cols: Seq[String]): DataFrame = { val columnEquals = df.sqlContext.analyzer.resolver @@ -172,6 +204,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * import com.google.common.collect.ImmutableMap; * df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0)); * }}} + * + * @since 1.3.1 */ def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.toSeq) @@ -189,6 +223,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * "B" -> 1.0 * )) * }}} + * + * @since 1.3.1 */ def fill(valueMap: Map[String, Any]): DataFrame = fill0(valueMap.toSeq) @@ -212,6 +248,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param col name of the column to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame = { replace[T](col, replacement.toMap : Map[T, T]) @@ -233,6 +271,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param cols list of columns to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = { replace(cols.toSeq, replacement.toMap) @@ -256,6 +296,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param col name of the column to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](col: String, replacement: Map[T, T]): DataFrame = { if (col == "*") { @@ -279,6 +321,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param cols list of columns to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = replace0(cols, replacement) http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index a1e7447..5d106c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -23,6 +23,8 @@ import org.apache.spark.sql.execution.stat._ /** * :: Experimental :: * Statistic functions for [[DataFrame]]s. + * + * @since 1.4.0 */ @Experimental final class DataFrameStatFunctions private[sql](df: DataFrame) { @@ -32,6 +34,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col1 the name of the first column * @param col2 the name of the second column * @return the covariance of the two columns. + * + * @since 1.4.0 */ def cov(col1: String, col2: String): Double = { StatFunctions.calculateCov(df, Seq(col1, col2)) @@ -45,6 +49,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col1 the name of the column * @param col2 the name of the column to calculate the correlation against * @return The Pearson Correlation Coefficient as a Double. + * + * @since 1.4.0 */ def corr(col1: String, col2: String, method: String): Double = { require(method == "pearson", "Currently only the calculation of the Pearson Correlation " + @@ -58,6 +64,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col1 the name of the column * @param col2 the name of the column to calculate the correlation against * @return The Pearson Correlation Coefficient as a Double. + * + * @since 1.4.0 */ def corr(col1: String, col2: String): Double = { corr(col1, col2, "pearson") @@ -76,6 +84,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col2 The name of the second column. Distinct items will make the column names * of the DataFrame. * @return A DataFrame containing for the contingency table. + * + * @since 1.4.0 */ def crosstab(col1: String, col2: String): DataFrame = { StatFunctions.crossTabulate(df, col1, col2) @@ -91,6 +101,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param support The minimum frequency for an item to be considered `frequent`. Should be greater * than 1e-4. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Array[String], support: Double): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, support) @@ -104,6 +116,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Array[String]): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, 0.01) @@ -116,6 +130,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Seq[String], support: Double): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, support) @@ -129,6 +145,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Seq[String]): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, 0.01) http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala index d5d7e35..717709e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala @@ -27,6 +27,8 @@ import org.apache.spark.annotation.Experimental * {{{ * sqlContext.experimental.extraStrategies += ... * }}} + * + * @since 1.3.0 */ @Experimental class ExperimentalMethods protected[sql](sqlContext: SQLContext) { @@ -34,6 +36,8 @@ class ExperimentalMethods protected[sql](sqlContext: SQLContext) { /** * Allows extra strategies to be injected into the query planner at runtime. Note this API * should be consider experimental and is not intended to be stable across releases. + * + * @since 1.3.0 */ @Experimental var extraStrategies: Seq[Strategy] = Nil http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala index 543320e..1381b9f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala @@ -30,6 +30,8 @@ import org.apache.spark.sql.types.NumericType /** * :: Experimental :: * A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]]. + * + * @since 1.3.0 */ @Experimental class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) { @@ -94,6 +96,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * "expense" -> "sum" * ) * }}} + * + * @since 1.3.0 */ def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = { agg((aggExpr +: aggExprs).toMap) @@ -111,6 +115,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * "expense" -> "sum" * )) * }}} + * + * @since 1.3.0 */ def agg(exprs: Map[String, String]): DataFrame = { exprs.map { case (colName, expr) => @@ -129,6 +135,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * import com.google.common.collect.ImmutableMap; * df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum")); * }}} + * + * @since 1.3.0 */ def agg(exprs: java.util.Map[String, String]): DataFrame = { agg(exprs.toMap) @@ -162,6 +170,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * // Java, 1.3.x: * df.groupBy("department").agg(col("department"), max("age"), sum("expense")); * }}} + * + * @since 1.3.0 */ @scala.annotation.varargs def agg(expr: Column, exprs: Column*): DataFrame = { @@ -183,6 +193,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) /** * Count the number of rows for each group. * The resulting [[DataFrame]] will also contain the grouping columns. + * + * @since 1.3.0 */ def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")()) @@ -190,6 +202,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the average value for each numeric columns for each group. This is an alias for `avg`. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the average values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def mean(colNames: String*): DataFrame = { @@ -200,6 +214,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the max value for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the max values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def max(colNames: String*): DataFrame = { @@ -210,6 +226,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the mean value for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the mean values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def avg(colNames: String*): DataFrame = { @@ -220,6 +238,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the min value for each numeric column for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the min values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def min(colNames: String*): DataFrame = { @@ -230,6 +250,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * Compute the sum for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the sum for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def sum(colNames: String*): DataFrame = { http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala index db484c5..1ec874f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala @@ -21,11 +21,12 @@ import java.beans.Introspector import java.lang.{Iterable => JIterable} import java.util.{Iterator => JIterator, Map => JMap} +import scala.language.existentials + import com.google.common.reflect.TypeToken import org.apache.spark.sql.types._ -import scala.language.existentials /** * Type-inference utilities for POJOs and Java collections. http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 70ba898..975498c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -95,6 +95,8 @@ private[spark] class DefaultParserDialect extends ParserDialect { * @groupname config Configuration * @groupname dataframes Custom DataFrame Creation * @groupname Ungrouped Support functions for language integrated queries. + * + * @since 1.0.0 */ class SQLContext(@transient val sparkContext: SparkContext) extends org.apache.spark.Logging @@ -113,6 +115,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Set Spark SQL configuration properties. * * @group config + * @since 1.0.0 */ def setConf(props: Properties): Unit = conf.setConf(props) @@ -120,6 +123,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Set the given Spark SQL configuration property. * * @group config + * @since 1.0.0 */ def setConf(key: String, value: String): Unit = conf.setConf(key, value) @@ -127,6 +131,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Return the value of Spark SQL configuration property for the given key. * * @group config + * @since 1.0.0 */ def getConf(key: String): String = conf.getConf(key) @@ -135,6 +140,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * yet, return `defaultValue`. * * @group config + * @since 1.0.0 */ def getConf(key: String, defaultValue: String): String = conf.getConf(key, defaultValue) @@ -143,6 +149,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * This creates a new copy of the config properties in the form of a Map. * * @group config + * @since 1.0.0 */ def getAllConfs: immutable.Map[String, String] = conf.getAllConfs @@ -228,6 +235,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * the query planner for advanced functionality. * * @group basic + * @since 1.3.0 */ @Experimental @transient @@ -238,6 +246,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns a [[DataFrame]] with no rows or columns. * * @group basic + * @since 1.3.0 */ @Experimental @transient @@ -270,6 +279,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group basic + * @since 1.3.0 * TODO move to SQLSession? */ @transient @@ -278,23 +288,27 @@ class SQLContext(@transient val sparkContext: SparkContext) /** * Returns true if the table is currently cached in-memory. * @group cachemgmt + * @since 1.3.0 */ def isCached(tableName: String): Boolean = cacheManager.isCached(tableName) /** * Caches the specified table in-memory. * @group cachemgmt + * @since 1.3.0 */ def cacheTable(tableName: String): Unit = cacheManager.cacheTable(tableName) /** * Removes the specified table from the in-memory cache. * @group cachemgmt + * @since 1.3.0 */ def uncacheTable(tableName: String): Unit = cacheManager.uncacheTable(tableName) /** * Removes all cached tables from the in-memory cache. + * @since 1.3.0 */ def clearCache(): Unit = cacheManager.clearCache() @@ -311,27 +325,40 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group basic + * @since 1.3.0 */ @Experimental object implicits extends Serializable { // scalastyle:on - /** Converts $"col name" into an [[Column]]. */ + /** + * Converts $"col name" into an [[Column]]. + * @since 1.3.0 + */ implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args :_*)) } } - /** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */ + /** + * An implicit conversion that turns a Scala `Symbol` into a [[Column]]. + * @since 1.3.0 + */ implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name) - /** Creates a DataFrame from an RDD of case classes or tuples. */ + /** + * Creates a DataFrame from an RDD of case classes or tuples. + * @since 1.3.0 + */ implicit def rddToDataFrameHolder[A <: Product : TypeTag](rdd: RDD[A]): DataFrameHolder = { DataFrameHolder(self.createDataFrame(rdd)) } - /** Creates a DataFrame from a local Seq of Product. */ + /** + * Creates a DataFrame from a local Seq of Product. + * @since 1.3.0 + */ implicit def localSeqToDataFrameHolder[A <: Product : TypeTag](data: Seq[A]): DataFrameHolder = { DataFrameHolder(self.createDataFrame(data)) @@ -341,7 +368,10 @@ class SQLContext(@transient val sparkContext: SparkContext) // making existing implicit conversions ambiguous. In particular, RDD[Double] is dangerous // because of [[DoubleRDDFunctions]]. - /** Creates a single column DataFrame from an RDD[Int]. */ + /** + * Creates a single column DataFrame from an RDD[Int]. + * @since 1.3.0 + */ implicit def intRddToDataFrameHolder(data: RDD[Int]): DataFrameHolder = { val dataType = IntegerType val rows = data.mapPartitions { iter => @@ -354,7 +384,10 @@ class SQLContext(@transient val sparkContext: SparkContext) DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil))) } - /** Creates a single column DataFrame from an RDD[Long]. */ + /** + * Creates a single column DataFrame from an RDD[Long]. + * @since 1.3.0 + */ implicit def longRddToDataFrameHolder(data: RDD[Long]): DataFrameHolder = { val dataType = LongType val rows = data.mapPartitions { iter => @@ -367,7 +400,10 @@ class SQLContext(@transient val sparkContext: SparkContext) DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil))) } - /** Creates a single column DataFrame from an RDD[String]. */ + /** + * Creates a single column DataFrame from an RDD[String]. + * @since 1.3.0 + */ implicit def stringRddToDataFrameHolder(data: RDD[String]): DataFrameHolder = { val dataType = StringType val rows = data.mapPartitions { iter => @@ -386,6 +422,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Creates a DataFrame from an RDD of case classes. * * @group dataframes + * @since 1.3.0 */ @Experimental def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = { @@ -401,6 +438,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Creates a DataFrame from a local Seq of Product. * * @group dataframes + * @since 1.3.0 */ @Experimental def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = { @@ -414,6 +452,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]]. * * @group dataframes + * @since 1.3.0 */ def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = { DataFrame(this, LogicalRelation(baseRelation)) @@ -449,6 +488,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group dataframes + * @since 1.3.0 */ @DeveloperApi def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = { @@ -480,6 +520,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * the provided schema. Otherwise, there will be runtime exception. * * @group dataframes + * @since 1.3.0 */ @DeveloperApi def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { @@ -492,6 +533,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * @group dataframes + * @since 1.3.0 */ def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = { val attributeSeq = getSchema(beanClass) @@ -520,6 +562,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * @group dataframes + * @since 1.3.0 */ def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { createDataFrame(rdd.rdd, beanClass) @@ -591,6 +634,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * [[DataFrame]] if no paths are passed in. * * @group specificdata + * @since 1.3.0 */ @scala.annotation.varargs def parquetFile(paths: String*): DataFrame = { @@ -609,6 +653,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It goes through the entire dataset once to determine the schema. * * @group specificdata + * @since 1.3.0 */ def jsonFile(path: String): DataFrame = jsonFile(path, 1.0) @@ -618,6 +663,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonFile(path: String, schema: StructType): DataFrame = @@ -626,6 +672,7 @@ class SQLContext(@transient val sparkContext: SparkContext) /** * :: Experimental :: * @group specificdata + * @since 1.3.0 */ @Experimental def jsonFile(path: String, samplingRatio: Double): DataFrame = @@ -637,6 +684,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It goes through the entire dataset once to determine the schema. * * @group specificdata + * @since 1.3.0 */ def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0) @@ -647,6 +695,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It goes through the entire dataset once to determine the schema. * * @group specificdata + * @since 1.3.0 */ def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0) @@ -656,6 +705,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { @@ -678,6 +728,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * schema, returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { @@ -690,6 +741,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * schema, returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { @@ -711,6 +763,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * schema, returning the result as a [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { @@ -723,6 +776,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * using the default data source configured by spark.sql.sources.default. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(path: String): DataFrame = { @@ -735,6 +789,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the dataset stored at path as a DataFrame, using the given data source. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(path: String, source: String): DataFrame = { @@ -747,6 +802,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(source: String, options: java.util.Map[String, String]): DataFrame = { @@ -759,6 +815,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load(source: String, options: Map[String, String]): DataFrame = { @@ -772,6 +829,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -787,6 +845,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -802,6 +861,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * (Scala-specific) Returns the dataset specified by the given data source and * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -817,6 +877,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * (Scala-specific) Returns the dataset specified by the given data source and * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. * @group genericdata + * @since 1.3.0 */ @Experimental def load( @@ -834,6 +895,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It will use the default data source configured by spark.sql.sources.default. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable(tableName: String, path: String): DataFrame = { @@ -847,6 +909,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * and returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -862,6 +925,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -878,6 +942,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -903,6 +968,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options. Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -920,6 +986,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options. Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -946,6 +1013,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * url named table. * * @group specificdata + * @since 1.3.0 */ @Experimental def jdbc(url: String, table: String): DataFrame = { @@ -958,6 +1026,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * url named table and connection properties. * * @group specificdata + * @since 1.4.0 */ @Experimental def jdbc(url: String, table: String, properties: Properties): DataFrame = { @@ -976,6 +1045,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split * evenly into this many partitions * @group specificdata + * @since 1.3.0 */ @Experimental def jdbc( @@ -1001,6 +1071,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * evenly into this many partitions * @param properties connection properties * @group specificdata + * @since 1.4.0 */ @Experimental def jdbc( @@ -1024,6 +1095,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * of the [[DataFrame]]. * * @group specificdata + * @since 1.3.0 */ @Experimental def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = { @@ -1038,6 +1110,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * of the [[DataFrame]]. * * @group specificdata + * @since 1.4.0 */ @Experimental def jdbc( @@ -1075,6 +1148,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * @param tableName the name of the table to be unregistered. * * @group basic + * @since 1.3.0 */ def dropTempTable(tableName: String): Unit = { cacheManager.tryUncacheQuery(table(tableName)) @@ -1086,6 +1160,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * used for SQL parsing can be configured with 'spark.sql.dialect'. * * @group basic + * @since 1.3.0 */ def sql(sqlText: String): DataFrame = { DataFrame(this, parseSql(sqlText)) @@ -1095,6 +1170,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the specified table as a [[DataFrame]]. * * @group ddl_ops + * @since 1.3.0 */ def table(tableName: String): DataFrame = DataFrame(this, catalog.lookupRelation(Seq(tableName))) @@ -1105,6 +1181,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * indicating if a table is a temporary one or not). * * @group ddl_ops + * @since 1.3.0 */ def tables(): DataFrame = { DataFrame(this, ShowTablesCommand(None)) @@ -1116,6 +1193,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * indicating if a table is a temporary one or not). * * @group ddl_ops + * @since 1.3.0 */ def tables(databaseName: String): DataFrame = { DataFrame(this, ShowTablesCommand(Some(databaseName))) @@ -1125,6 +1203,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the names of tables in the current database as an array. * * @group ddl_ops + * @since 1.3.0 */ def tableNames(): Array[String] = { catalog.getTables(None).map { @@ -1136,6 +1215,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the names of tables in the given database as an array. * * @group ddl_ops + * @since 1.3.0 */ def tableNames(databaseName: String): Array[String] = { catalog.getTables(Some(databaseName)).map { http://git-wip-us.apache.org/repos/asf/spark/blob/bdd5db9f/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala index 5921eaf..6b1ae81 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql - import scala.util.parsing.combinator.RegexParsers import org.apache.spark.sql.catalyst.AbstractSparkSQLParser --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org