Java API documentation

srowen Sat, 19 Nov 2016 03:25:15 -0800

Repository: spark
Updated Branches:
  refs/heads/master db9fb9baa -> d5b1d5fc8



http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/python/pyspark/rdd.py
----------------------------------------------------------------------
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index a163cea..641787e 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1218,7 +1218,7 @@ class RDD(object):
 
     def top(self, num, key=None):
         """
-        Get the top N elements from a RDD.
+        Get the top N elements from an RDD.
 
         Note that this method should only be used if the resulting array is 
expected
         to be small, as all the data is loaded into the driver's memory.
@@ -1242,7 +1242,7 @@ class RDD(object):
 
     def takeOrdered(self, num, key=None):
         """
-        Get the N elements from a RDD ordered in ascending order or as
+        Get the N elements from an RDD ordered in ascending order or as
         specified by the optional key function.
 
         Note that this method should only be used if the resulting array is 
expected

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/python/pyspark/streaming/kafka.py
----------------------------------------------------------------------
diff --git a/python/pyspark/streaming/kafka.py 
b/python/pyspark/streaming/kafka.py
index bf27d80..134424a 100644
--- a/python/pyspark/streaming/kafka.py
+++ b/python/pyspark/streaming/kafka.py
@@ -144,7 +144,7 @@ class KafkaUtils(object):
         """
         .. note:: Experimental
 
-        Create a RDD from Kafka using offset ranges for each topic and 
partition.
+        Create an RDD from Kafka using offset ranges for each topic and 
partition.
 
         :param sc:  SparkContext object
         :param kafkaParams: Additional params for Kafka
@@ -155,7 +155,7 @@ class KafkaUtils(object):
         :param valueDecoder:  A function used to decode value (default is 
utf8_decoder)
         :param messageHandler: A function used to convert 
KafkaMessageAndMetadata. You can assess
                                meta using messageHandler (default is None).
-        :return: A RDD object
+        :return: An RDD object
         """
         if leaders is None:
             leaders = dict()

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index dc90659..0b95a88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -165,10 +165,10 @@ object Encoders {
    * (Scala-specific) Creates an encoder that serializes objects of type T 
using generic Java
    * serialization. This encoder maps T into a single byte array (binary) 
field.
    *
-   * Note that this is extremely inefficient and should only be used as the 
last resort.
-   *
    * T must be publicly accessible.
    *
+   * @note This is extremely inefficient and should only be used as the last 
resort.
+   *
    * @since 1.6.0
    */
   def javaSerialization[T: ClassTag]: Encoder[T] = genericSerializer(useKryo = 
false)
@@ -177,10 +177,10 @@ object Encoders {
    * Creates an encoder that serializes objects of type T using generic Java 
serialization.
    * This encoder maps T into a single byte array (binary) field.
    *
-   * Note that this is extremely inefficient and should only be used as the 
last resort.
-   *
    * T must be publicly accessible.
    *
+   * @note This is extremely inefficient and should only be used as the last 
resort.
+   *
    * @since 1.6.0
    */
   def javaSerialization[T](clazz: Class[T]): Encoder[T] = 
javaSerialization(ClassTag[T](clazz))

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index e121044..21f3497 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -23,10 +23,10 @@ import org.apache.spark.annotation.InterfaceStability
  * The data type representing calendar time intervals. The calendar time 
interval is stored
  * internally in two components: number of months the number of microseconds.
  *
- * Note that calendar intervals are not comparable.
- *
  * Please use the singleton [[DataTypes.CalendarIntervalType]].
  *
+ * @note Calendar intervals are not comparable.
+ *
  * @since 1.5.0
  */
 @InterfaceStability.Stable

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 7a131b3..fa3b2b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -118,7 +118,7 @@ class TypedColumn[-T, U](
  *   $"a" === $"b"
  * }}}
  *
- * Note that the internal Catalyst expression can be accessed via "expr", but 
this method is for
+ * @note The internal Catalyst expression can be accessed via "expr", but this 
method is for
  * debugging purposes only and can change in any future Spark releases.
  *
  * @groupname java_expr_ops Java-specific expression operators

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index b5bbcee..6335fc4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -51,7 +51,6 @@ final class DataFrameStatFunctions private[sql](df: 
DataFrame) {
    * The algorithm was first present in 
[[http://dx.doi.org/10.1145/375663.375670 Space-efficient
    * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
    *
-   * Note that NaN values will be removed from the numerical column before 
calculation
    * @param col the name of the numerical column
    * @param probabilities a list of quantile probabilities
    *   Each number must belong to [0, 1].
@@ -61,6 +60,8 @@ final class DataFrameStatFunctions private[sql](df: 
DataFrame) {
    *   Note that values greater than 1 are accepted but give the same result 
as 1.
    * @return the approximate quantiles at the given probabilities
    *
+   * @note NaN values will be removed from the numerical column before 
calculation
+   *
    * @since 2.0.0
    */
   def approxQuantile(

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e0c8981..15281f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -218,7 +218,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) 
{
    * Inserts the content of the [[DataFrame]] to the specified table. It 
requires that
    * the schema of the [[DataFrame]] is the same as the schema of the table.
    *
-   * Note: Unlike `saveAsTable`, `insertInto` ignores the column names and 
just uses position-based
+   * @note Unlike `saveAsTable`, `insertInto` ignores the column names and 
just uses position-based
    * resolution. For example:
    *
    * {{{

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 3761773..3c75a6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -377,7 +377,7 @@ class Dataset[T] private[sql](
 
   /**
    * Converts this strongly typed collection of data to generic `DataFrame` 
with columns renamed.
-   * This can be quite convenient in conversion from a RDD of tuples into a 
[[DataFrame]] with
+   * This can be quite convenient in conversion from an RDD of tuples into a 
[[DataFrame]] with
    * meaningful names. For example:
    * {{{
    *   val rdd: RDD[(Int, String)] = ...
@@ -703,13 +703,13 @@ class Dataset[T] private[sql](
    *   df1.join(df2, "user_id")
    * }}}
    *
-   * Note that if you perform a self-join using this function without aliasing 
the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the 
join, since
-   * there is no way to disambiguate which side of the join you would like to 
reference.
-   *
    * @param right Right side of the join operation.
    * @param usingColumn Name of the column to join on. This column must exist 
on both sides.
    *
+   * @note If you perform a self-join using this function without aliasing the 
input
+   * [[DataFrame]]s, you will NOT be able to reference any columns after the 
join, since
+   * there is no way to disambiguate which side of the join you would like to 
reference.
+   *
    * @group untypedrel
    * @since 2.0.0
    */
@@ -728,13 +728,13 @@ class Dataset[T] private[sql](
    *   df1.join(df2, Seq("user_id", "user_name"))
    * }}}
    *
-   * Note that if you perform a self-join using this function without aliasing 
the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the 
join, since
-   * there is no way to disambiguate which side of the join you would like to 
reference.
-   *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must 
exist on both sides.
    *
+   * @note If you perform a self-join using this function without aliasing the 
input
+   * [[DataFrame]]s, you will NOT be able to reference any columns after the 
join, since
+   * there is no way to disambiguate which side of the join you would like to 
reference.
+   *
    * @group untypedrel
    * @since 2.0.0
    */
@@ -748,14 +748,14 @@ class Dataset[T] private[sql](
    * Different from other join functions, the join columns will only appear 
once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
    *
-   * Note that if you perform a self-join using this function without aliasing 
the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the 
join, since
-   * there is no way to disambiguate which side of the join you would like to 
reference.
-   *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must 
exist on both sides.
    * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, 
`leftsemi`.
    *
+   * @note If you perform a self-join using this function without aliasing the 
input
+   * [[DataFrame]]s, you will NOT be able to reference any columns after the 
join, since
+   * there is no way to disambiguate which side of the join you would like to 
reference.
+   *
    * @group untypedrel
    * @since 2.0.0
    */
@@ -856,10 +856,10 @@ class Dataset[T] private[sql](
   /**
    * Explicit cartesian join with another [[DataFrame]].
    *
-   * Note that cartesian joins are very expensive without an extra filter that 
can be pushed down.
-   *
    * @param right Right side of the join operation.
    *
+   * @note Cartesian joins are very expensive without an extra filter that can 
be pushed down.
+   *
    * @group untypedrel
    * @since 2.1.0
    */
@@ -1044,7 +1044,8 @@ class Dataset[T] private[sql](
 
   /**
    * Selects column based on the column name and return it as a [[Column]].
-   * Note that the column name can also reference to a nested column like 
`a.b`.
+   *
+   * @note The column name can also reference to a nested column like `a.b`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -1053,7 +1054,8 @@ class Dataset[T] private[sql](
 
   /**
    * Selects column based on the column name and return it as a [[Column]].
-   * Note that the column name can also reference to a nested column like 
`a.b`.
+   *
+   * @note The column name can also reference to a nested column like `a.b`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -1621,7 +1623,7 @@ class Dataset[T] private[sql](
    * Returns a new Dataset containing rows only in both this Dataset and 
another Dataset.
    * This is equivalent to `INTERSECT` in SQL.
    *
-   * Note that, equality checking is performed directly on the encoded 
representation of the data
+   * @note Equality checking is performed directly on the encoded 
representation of the data
    * and thus is not affected by a custom `equals` function defined on `T`.
    *
    * @group typedrel
@@ -1635,7 +1637,7 @@ class Dataset[T] private[sql](
    * Returns a new Dataset containing rows in this Dataset but not in another 
Dataset.
    * This is equivalent to `EXCEPT` in SQL.
    *
-   * Note that, equality checking is performed directly on the encoded 
representation of the data
+   * @note Equality checking is performed directly on the encoded 
representation of the data
    * and thus is not affected by a custom `equals` function defined on `T`.
    *
    * @group typedrel
@@ -1648,13 +1650,13 @@ class Dataset[T] private[sql](
   /**
    * Returns a new [[Dataset]] by sampling a fraction of rows, using a 
user-supplied seed.
    *
-   * Note: this is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[Dataset]].
-   *
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.
    * @param seed Seed for sampling.
    *
+   * @note This is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[Dataset]].
+   *
    * @group typedrel
    * @since 1.6.0
    */
@@ -1670,12 +1672,12 @@ class Dataset[T] private[sql](
   /**
    * Returns a new [[Dataset]] by sampling a fraction of rows, using a random 
seed.
    *
-   * Note: this is NOT guaranteed to provide exactly the fraction of the total 
count
-   * of the given [[Dataset]].
-   *
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.
    *
+   * @note This is NOT guaranteed to provide exactly the fraction of the total 
count
+   * of the given [[Dataset]].
+   *
    * @group typedrel
    * @since 1.6.0
    */
@@ -2375,7 +2377,7 @@ class Dataset[T] private[sql](
    *
    * The iterator will consume as much memory as the largest partition in this 
Dataset.
    *
-   * Note: this results in multiple Spark jobs, and if the input Dataset is 
the result
+   * @note this results in multiple Spark jobs, and if the input Dataset is 
the result
    * of a wide transformation (e.g. join with different partitioners), to avoid
    * recomputing the input Dataset should be cached first.
    *
@@ -2453,7 +2455,7 @@ class Dataset[T] private[sql](
    * Returns a new Dataset that contains only the unique rows from this 
Dataset.
    * This is an alias for `dropDuplicates`.
    *
-   * Note that, equality checking is performed directly on the encoded 
representation of the data
+   * @note Equality checking is performed directly on the encoded 
representation of the data
    * and thus is not affected by a custom `equals` function defined on `T`.
    *
    * @group typedrel

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 3c5cf03..2fae936 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -181,9 +181,6 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
 
   /**
    * A collection of methods for registering user-defined functions (UDF).
-   * Note that the user-defined functions must be deterministic. Due to 
optimization,
-   * duplicate invocations may be eliminated or the function may even be 
invoked more times than
-   * it is present in the query.
    *
    * The following example registers a Scala closure as UDF:
    * {{{
@@ -208,6 +205,10 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
    *       DataTypes.StringType);
    * }}}
    *
+   * @note The user-defined functions must be deterministic. Due to 
optimization,
+   * duplicate invocations may be eliminated or the function may even be 
invoked more times than
+   * it is present in the query.
+   *
    * @group basic
    * @since 1.3.0
    */

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 58b2ab3..e09e3ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -155,9 +155,6 @@ class SparkSession private(
 
   /**
    * A collection of methods for registering user-defined functions (UDF).
-   * Note that the user-defined functions must be deterministic. Due to 
optimization,
-   * duplicate invocations may be eliminated or the function may even be 
invoked more times than
-   * it is present in the query.
    *
    * The following example registers a Scala closure as UDF:
    * {{{
@@ -182,6 +179,10 @@ class SparkSession private(
    *       DataTypes.StringType);
    * }}}
    *
+   * @note The user-defined functions must be deterministic. Due to 
optimization,
+   * duplicate invocations may be eliminated or the function may even be 
invoked more times than
+   * it is present in the query.
+   *
    * @since 2.0.0
    */
   def udf: UDFRegistration = sessionState.udf
@@ -201,7 +202,7 @@ class SparkSession private(
    * Start a new session with isolated SQL configurations, temporary tables, 
registered
    * functions are isolated, but sharing the underlying [[SparkContext]] and 
cached data.
    *
-   * Note: Other than the [[SparkContext]], all shared state is initialized 
lazily.
+   * @note Other than the [[SparkContext]], all shared state is initialized 
lazily.
    * This method will force the initialization of the shared state to ensure 
that parent
    * and child sessions are set up with the same shared state. If the 
underlying catalog
    * implementation is Hive, this will initialize the metastore, which may 
take some time.

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 0444ad1..6043c5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -39,7 +39,8 @@ import org.apache.spark.util.Utils
 
 /**
  * Functions for registering user-defined functions. Use [[SQLContext.udf]] to 
access this.
- * Note that the user-defined functions must be deterministic.
+ *
+ * @note The user-defined functions must be deterministic.
  *
  * @since 1.3.0
  */

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
index 4914a9d..1b56c08 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -28,7 +28,7 @@ package object state {
 
   implicit class StateStoreOps[T: ClassTag](dataRDD: RDD[T]) {
 
-    /** Map each partition of a RDD along with data in a [[StateStore]]. */
+    /** Map each partition of an RDD along with data in a [[StateStore]]. */
     def mapPartitionsWithStateStore[U: ClassTag](
         sqlContext: SQLContext,
         checkpointLocation: String,
@@ -49,7 +49,7 @@ package object state {
         storeUpdateFunction)
     }
 
-    /** Map each partition of a RDD along with data in a [[StateStore]]. */
+    /** Map each partition of an RDD along with data in a [[StateStore]]. */
     private[streaming] def mapPartitionsWithStateStore[U: ClassTag](
         checkpointLocation: String,
         operatorId: Long,

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 28598af..36dd5f7 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -25,9 +25,7 @@ import org.apache.spark.sql.types.DataType
 
 /**
  * A user-defined function. To create one, use the `udf` functions in 
[[functions]].
- * Note that the user-defined functions must be deterministic. Due to 
optimization,
- * duplicate invocations may be eliminated or the function may even be invoked 
more times than
- * it is present in the query.
+ *
  * As an example:
  * {{{
  *   // Defined a UDF that returns true or false based on some numeric score.
@@ -37,6 +35,10 @@ import org.apache.spark.sql.types.DataType
  *   df.select( predict(df("score")) )
  * }}}
  *
+ * @note The user-defined functions must be deterministic. Due to optimization,
+ * duplicate invocations may be eliminated or the function may even be invoked 
more times than
+ * it is present in the query.
+ *
  * @since 1.3.0
  */
 @InterfaceStability.Stable

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index e221c03..d5940c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -476,7 +476,7 @@ object functions {
    *
    *   (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
    *
-   * Note: the list of columns should match with grouping columns exactly, or 
empty (means all the
+   * @note The list of columns should match with grouping columns exactly, or 
empty (means all the
    * grouping columns).
    *
    * @group agg_funcs
@@ -489,7 +489,7 @@ object functions {
    *
    *   (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
    *
-   * Note: the list of columns should match with grouping columns exactly.
+   * @note The list of columns should match with grouping columns exactly.
    *
    * @group agg_funcs
    * @since 2.0.0
@@ -1120,7 +1120,7 @@ object functions {
    * Generate a random column with independent and identically distributed 
(i.i.d.) samples
    * from U[0.0, 1.0].
    *
-   * Note that this is indeterministic when data partitions are not fixed.
+   * @note This is indeterministic when data partitions are not fixed.
    *
    * @group normal_funcs
    * @since 1.4.0
@@ -1140,7 +1140,7 @@ object functions {
    * Generate a column with independent and identically distributed (i.i.d.) 
samples from
    * the standard normal distribution.
    *
-   * Note that this is indeterministic when data partitions are not fixed.
+   * @note This is indeterministic when data partitions are not fixed.
    *
    * @group normal_funcs
    * @since 1.4.0
@@ -1159,7 +1159,7 @@ object functions {
   /**
    * Partition ID.
    *
-   * Note that this is indeterministic because it depends on data partitioning 
and task scheduling.
+   * @note This is indeterministic because it depends on data partitioning and 
task scheduling.
    *
    * @group normal_funcs
    * @since 1.6.0
@@ -2207,7 +2207,7 @@ object functions {
    * Locate the position of the first occurrence of substr column in the given 
string.
    * Returns null if either of the arguments are null.
    *
-   * NOTE: The position is not zero based, but 1 based index. Returns 0 if 
substr
+   * @note The position is not zero based, but 1 based index. Returns 0 if 
substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2242,7 +2242,8 @@ object functions {
 
   /**
    * Locate the position of the first occurrence of substr.
-   * NOTE: The position is not zero based, but 1 based index. Returns 0 if 
substr
+   *
+   * @note The position is not zero based, but 1 based index. Returns 0 if 
substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2255,7 +2256,7 @@ object functions {
   /**
    * Locate the position of the first occurrence of substr in a string column, 
after position pos.
    *
-   * NOTE: The position is not zero based, but 1 based index. returns 0 if 
substr
+   * @note The position is not zero based, but 1 based index. returns 0 if 
substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2369,7 +2370,8 @@ object functions {
 
   /**
    * Splits str around pattern (pattern is a regular expression).
-   * NOTE: pattern is a string representation of the regular expression.
+   *
+   * @note Pattern is a string representation of the regular expression.
    *
    * @group string_funcs
    * @since 1.5.0
@@ -2468,7 +2470,7 @@ object functions {
    * A pattern could be for instance `dd.MM.yyyy` and could return a string 
like '18.03.1993'. All
    * pattern letters of [[java.text.SimpleDateFormat]] can be used.
    *
-   * NOTE: Use when ever possible specialized functions like [[year]]. These 
benefit from a
+   * @note Use when ever possible specialized functions like [[year]]. These 
benefit from a
    * specialized implementation.
    *
    * @group datetime_funcs

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index dec316b..7c64e28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -140,7 +140,7 @@ abstract class JdbcDialect extends Serializable {
  * tried in reverse order. A user-added dialect will thus be applied first,
  * overwriting the defaults.
  *
- * Note that all new dialects are applied to new jdbc DataFrames only. Make
+ * @note All new dialects are applied to new jdbc DataFrames only. Make
  * sure to register your dialects first.
  */
 @DeveloperApi

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 15a4807..ff6dd8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -69,7 +69,8 @@ trait DataSourceRegister {
 trait RelationProvider {
   /**
    * Returns a new base relation with the given parameters.
-   * Note: the parameters' keywords are case insensitive and this 
insensitivity is enforced
+   *
+   * @note The parameters' keywords are case insensitive and this 
insensitivity is enforced
    * by the Map that is passed to the function.
    */
   def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): 
BaseRelation
@@ -99,7 +100,8 @@ trait RelationProvider {
 trait SchemaRelationProvider {
   /**
    * Returns a new base relation with the given parameters and user defined 
schema.
-   * Note: the parameters' keywords are case insensitive and this 
insensitivity is enforced
+   *
+   * @note The parameters' keywords are case insensitive and this 
insensitivity is enforced
    * by the Map that is passed to the function.
    */
   def createRelation(
@@ -205,7 +207,7 @@ abstract class BaseRelation {
    * large to broadcast. This method will be called multiple times during 
query planning
    * and thus should not perform expensive operations for each invocation.
    *
-   * Note that it is always better to overestimate size than underestimate, 
because underestimation
+   * @note It is always better to overestimate size than underestimate, 
because underestimation
    * could lead to execution plans that are suboptimal (i.e. broadcasting a 
very large table).
    *
    * @since 1.3.0
@@ -219,7 +221,7 @@ abstract class BaseRelation {
    *
    * If `needConversion` is `false`, buildScan() should return an [[RDD]] of 
[[InternalRow]]
    *
-   * Note: The internal representation is not stable across releases and thus 
data sources outside
+   * @note The internal representation is not stable across releases and thus 
data sources outside
    * of Spark SQL should leave this as true.
    *
    * @since 1.4.0

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index 5e93fc4..4504582 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.QueryExecution
  * :: Experimental ::
  * The interface of query execution listener that can be used to analyze 
execution metrics.
  *
- * Note that implementations should guarantee thread-safety as they can be 
invoked by
+ * @note Implementations should guarantee thread-safety as they can be invoked 
by
  * multiple different threads.
  */
 @Experimental
@@ -39,24 +39,26 @@ trait QueryExecutionListener {
 
   /**
    * A callback function that will be called when a query executed 
successfully.
-   * Note that this can be invoked by multiple different threads.
    *
    * @param funcName name of the action that triggered this query.
    * @param qe the QueryExecution object that carries detail information like 
logical plan,
    *           physical plan, etc.
    * @param durationNs the execution time for this query in nanoseconds.
+   *
+   * @note This can be invoked by multiple different threads.
    */
   @DeveloperApi
   def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit
 
   /**
    * A callback function that will be called when a query execution failed.
-   * Note that this can be invoked by multiple different threads.
    *
    * @param funcName the name of the action that triggered this query.
    * @param qe the QueryExecution object that carries detail information like 
logical plan,
    *           physical plan, etc.
    * @param exception the exception that failed this query.
+   *
+   * @note This can be invoked by multiple different threads.
    */
   @DeveloperApi
   def onFailure(funcName: String, qe: QueryExecution, exception: Exception): 
Unit

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 0daa29b..b272c8e 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -157,7 +157,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with 
SharedSQLContext {
     val allColumns = fields.map(_.name).mkString(",")
     val schema = StructType(fields)
 
-    // Create a RDD for the schema
+    // Create an RDD for the schema
     val rdd =
       sparkContext.parallelize((1 to 10000), 10).map { i =>
         Row(

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 4808d0f..444261d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -421,11 +421,11 @@ class StreamingContext private[streaming] (
    * by "moving" them from another location within the same file system. File 
names
    * starting with . are ignored.
    *
-   * '''Note:''' We ensure that the byte array for each record in the
-   * resulting RDDs of the DStream has the provided record length.
-   *
    * @param directory HDFS directory to monitor for new file
    * @param recordLength length of each record in bytes
+   *
+   * @note We ensure that the byte array for each record in the
+   * resulting RDDs of the DStream has the provided record length.
    */
   def binaryRecordsStream(
       directory: String,
@@ -447,12 +447,12 @@ class StreamingContext private[streaming] (
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE: Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
-   * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
    * @param queue      Queue of RDDs. Modifications to this data structure 
must be synchronized.
    * @param oneAtATime Whether only one RDD should be consumed from the queue 
in every interval
    * @tparam T         Type of objects in the RDD
+   *
+   * @note Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
+   * those RDDs, so `queueStream` doesn't support checkpointing.
    */
   def queueStream[T: ClassTag](
       queue: Queue[RDD[T]],
@@ -465,14 +465,14 @@ class StreamingContext private[streaming] (
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE: Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
-   * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
    * @param queue      Queue of RDDs. Modifications to this data structure 
must be synchronized.
    * @param oneAtATime Whether only one RDD should be consumed from the queue 
in every interval
    * @param defaultRDD Default RDD is returned by the DStream when the queue 
is empty.
    *                   Set as null if no RDD should be returned when empty
    * @tparam T         Type of objects in the RDD
+   *
+   * @note Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
+   * those RDDs, so `queueStream` doesn't support checkpointing.
    */
   def queueStream[T: ClassTag](
       queue: Queue[RDD[T]],

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index da9ff85..aa4003c 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -74,7 +74,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    */
   def repartition(numPartitions: Int): JavaPairDStream[K, V] = 
dstream.repartition(numPartitions)
 
-  /** Method that generates a RDD for the given Duration */
+  /** Method that generates an RDD for the given Duration */
   def compute(validTime: Time): JavaPairRDD[K, V] = {
     dstream.compute(validTime) match {
       case Some(rdd) => new JavaPairRDD(rdd)

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 4c4376a..b43b940 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -218,11 +218,11 @@ class JavaStreamingContext(val ssc: StreamingContext) 
extends Closeable {
    * for new files and reads them as flat binary files with fixed record 
lengths,
    * yielding byte arrays
    *
-   * '''Note:''' We ensure that the byte array for each record in the
-   * resulting RDDs of the DStream has the provided record length.
-   *
    * @param directory HDFS directory to monitor for new files
    * @param recordLength The length at which to split the records
+   *
+   * @note We ensure that the byte array for each record in the
+   * resulting RDDs of the DStream has the provided record length.
    */
   def binaryRecordsStream(directory: String, recordLength: Int): 
JavaDStream[Array[Byte]] = {
     ssc.binaryRecordsStream(directory, recordLength)
@@ -352,13 +352,13 @@ class JavaStreamingContext(val ssc: StreamingContext) 
extends Closeable {
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE:
+   * @param queue      Queue of RDDs
+   * @tparam T         Type of objects in the RDD
+   *
+   * @note
    * 1. Changes to the queue after the stream is created will not be 
recognized.
    * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
    * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
-   * @param queue      Queue of RDDs
-   * @tparam T         Type of objects in the RDD
    */
   def queueStream[T](queue: java.util.Queue[JavaRDD[T]]): JavaDStream[T] = {
     implicit val cm: ClassTag[T] =
@@ -372,14 +372,14 @@ class JavaStreamingContext(val ssc: StreamingContext) 
extends Closeable {
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE:
-   * 1. Changes to the queue after the stream is created will not be 
recognized.
-   * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
-   * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
    * @param queue      Queue of RDDs
    * @param oneAtATime Whether only one RDD should be consumed from the queue 
in every interval
    * @tparam T         Type of objects in the RDD
+   *
+   * @note
+   * 1. Changes to the queue after the stream is created will not be 
recognized.
+   * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
+   * those RDDs, so `queueStream` doesn't support checkpointing.
    */
   def queueStream[T](
       queue: java.util.Queue[JavaRDD[T]],
@@ -396,7 +396,7 @@ class JavaStreamingContext(val ssc: StreamingContext) 
extends Closeable {
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE:
+   * @note
    * 1. Changes to the queue after the stream is created will not be 
recognized.
    * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to 
recover data of
    * those RDDs, so `queueStream` doesn't support checkpointing.
@@ -454,9 +454,10 @@ class JavaStreamingContext(val ssc: StreamingContext) 
extends Closeable {
   /**
    * Create a new DStream in which each RDD is generated by applying a 
function on RDDs of
    * the DStreams. The order of the JavaRDDs in the transform function 
parameter will be the
-   * same as the order of corresponding DStreams in the list. Note that for 
adding a
-   * JavaPairDStream in the list of JavaDStreams, convert it to a JavaDStream 
using
-   * [[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
+   * same as the order of corresponding DStreams in the list.
+   *
+   * @note For adding a JavaPairDStream in the list of JavaDStreams, convert 
it to a
+   * JavaDStream using 
[[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
    * In the transform function, convert the JavaRDD corresponding to that 
JavaDStream to
    * a JavaPairRDD using org.apache.spark.api.java.JavaPairRDD.fromJavaRDD().
    */
@@ -476,9 +477,10 @@ class JavaStreamingContext(val ssc: StreamingContext) 
extends Closeable {
   /**
    * Create a new DStream in which each RDD is generated by applying a 
function on RDDs of
    * the DStreams. The order of the JavaRDDs in the transform function 
parameter will be the
-   * same as the order of corresponding DStreams in the list. Note that for 
adding a
-   * JavaPairDStream in the list of JavaDStreams, convert it to a JavaDStream 
using
-   * [[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
+   * same as the order of corresponding DStreams in the list.
+   *
+   * @note For adding a JavaPairDStream in the list of JavaDStreams, convert 
it to
+   * a JavaDStream using 
[[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
    * In the transform function, convert the JavaRDD corresponding to that 
JavaDStream to
    * a JavaPairRDD using org.apache.spark.api.java.JavaPairRDD.fromJavaRDD().
    */

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 7e0a2ca..e23edfa 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -69,13 +69,13 @@ abstract class DStream[T: ClassTag] (
   // Methods that should be implemented by subclasses of DStream
   // =======================================================================
 
-  /** Time interval after which the DStream generates a RDD */
+  /** Time interval after which the DStream generates an RDD */
   def slideDuration: Duration
 
   /** List of parent DStreams on which this DStream depends on */
   def dependencies: List[DStream[_]]
 
-  /** Method that generates a RDD for the given time */
+  /** Method that generates an RDD for the given time */
   def compute(validTime: Time): Option[RDD[T]]
 
   // =======================================================================

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
index ed08191..9512db7 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
@@ -128,7 +128,7 @@ class InternalMapWithStateDStream[K: ClassTag, V: ClassTag, 
S: ClassTag, E: Clas
     super.initialize(time)
   }
 
-  /** Method that generates a RDD for the given time */
+  /** Method that generates an RDD for the given time */
   override def compute(validTime: Time): Option[RDD[MapWithStateRDDRecord[K, 
S, E]]] = {
     // Get the previous state or create a new empty state RDD
     val prevStateRDD = getOrCompute(validTime - slideDuration) match {

http://git-wip-us.apache.org/repos/asf/spark/blob/d5b1d5fc/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
 
b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index ce5a6e0..a37fac8 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -186,7 +186,7 @@ class WriteAheadLogBackedBlockRDDSuite
     assert(rdd.collect() === data.flatten)
 
     // Verify that the block fetching is skipped when isBlockValid is set to 
false.
-    // This is done by using a RDD whose data is only in memory but is set to 
skip block fetching
+    // This is done by using an RDD whose data is only in memory but is set to 
skip block fetching
     // Using that RDD will throw exception, as it skips block fetching even if 
the blocks are in
     // in BlockManager.
     if (testIsBlockValid) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[1/3] spark git commit: [SPARK-18445][BUILD][DOCS] Fix the markdown for `Note:`/`NOTE:`/`Note that`/`'''Note:'''` across Scala/Java API documentation

Reply via email to