Repository: spark Updated Branches: refs/heads/master b6a873d6d -> 7eb83fefd
[SPARK-13137][SQL] NullPoingException in schema inference for CSV when the first line is empty https://issues.apache.org/jira/browse/SPARK-13137 This PR adds a filter in schema inference so that it does not emit NullPointException. Also, I removed `MAX_COMMENT_LINES_IN_HEADER `but instead used a monad chaining with `filter()` and `first()`. Lastly, I simply added a newline rather than adding a new file for this so that this is covered with the original tests. Author: hyukjinkwon <[email protected]> Closes #11023 from HyukjinKwon/SPARK-13137. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7eb83fef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7eb83fef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7eb83fef Branch: refs/heads/master Commit: 7eb83fefd19e137d80a23b5174b66b14831c291a Parents: b6a873d Author: hyukjinkwon <[email protected]> Authored: Sun Feb 21 13:21:59 2016 -0800 Committer: Reynold Xin <[email protected]> Committed: Sun Feb 21 13:21:59 2016 -0800 ---------------------------------------------------------------------- .../sql/execution/datasources/csv/CSVOptions.scala | 3 --- .../sql/execution/datasources/csv/CSVRelation.scala | 12 +++++++----- sql/core/src/test/resources/cars.csv | 1 + 3 files changed, 8 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7eb83fef/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala index bea8e97..38aa2dd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala @@ -75,9 +75,6 @@ private[sql] class CSVOptions( val ignoreLeadingWhiteSpaceFlag = getBool("ignoreLeadingWhiteSpace") val ignoreTrailingWhiteSpaceFlag = getBool("ignoreTrailingWhiteSpace") - // Limit the number of lines we'll search for a header row that isn't comment-prefixed - val MAX_COMMENT_LINES_IN_HEADER = 10 - // Parse mode flags if (!ParseModes.isValidMode(parseMode)) { logWarning(s"$parseMode is not a valid parse mode. Using ${ParseModes.DEFAULT}.") http://git-wip-us.apache.org/repos/asf/spark/blob/7eb83fef/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala index f8e3a1b..471ed0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala @@ -154,12 +154,14 @@ private[csv] class CSVRelation( */ private def findFirstLine(rdd: RDD[String]): String = { if (params.isCommentSet) { - rdd.take(params.MAX_COMMENT_LINES_IN_HEADER) - .find(!_.startsWith(params.comment.toString)) - .getOrElse(sys.error(s"No uncommented header line in " + - s"first ${params.MAX_COMMENT_LINES_IN_HEADER} lines")) + val comment = params.comment.toString + rdd.filter { line => + line.trim.nonEmpty && !line.startsWith(comment) + }.first() } else { - rdd.first() + rdd.filter { line => + line.trim.nonEmpty + }.first() } } } http://git-wip-us.apache.org/repos/asf/spark/blob/7eb83fef/sql/core/src/test/resources/cars.csv ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/cars.csv b/sql/core/src/test/resources/cars.csv index 2b9d74c..40ded57 100644 --- a/sql/core/src/test/resources/cars.csv +++ b/sql/core/src/test/resources/cars.csv @@ -1,3 +1,4 @@ + year,make,model,comment,blank "2012","Tesla","S","No comment", --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
