Repository: spark Updated Branches: refs/heads/master 29cea8f33 -> a76846cfb
[SPARK-18126][SPARK-CORE] getIteratorZipWithIndex accepts negative value as index ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) `Utils.getIteratorZipWithIndex` was added to deal with number of records > 2147483647 in one partition. method `getIteratorZipWithIndex` accepts `startIndex` < 0, which leads to negative index. This PR just adds a defensive check on `startIndex` to make sure it is >= 0. ## How was this patch tested? Add a new unit test. Author: Miao Wang <[email protected]> Closes #15639 from wangmiao1981/zip. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a76846cf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a76846cf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a76846cf Branch: refs/heads/master Commit: a76846cfb1c2d6c8f4d647426030b59de20d9433 Parents: 29cea8f Author: Miao Wang <[email protected]> Authored: Thu Oct 27 01:17:32 2016 +0200 Committer: Reynold Xin <[email protected]> Committed: Thu Oct 27 01:17:32 2016 +0200 ---------------------------------------------------------------------- core/src/main/scala/org/apache/spark/util/Utils.scala | 1 + core/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 3 +++ 2 files changed, 4 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a76846cf/core/src/main/scala/org/apache/spark/util/Utils.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index e57eb0d..6027b07 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1765,6 +1765,7 @@ private[spark] object Utils extends Logging { */ def getIteratorZipWithIndex[T](iterator: Iterator[T], startIndex: Long): Iterator[(T, Long)] = { new Iterator[(T, Long)] { + require(startIndex >= 0, "startIndex should be >= 0.") var index: Long = startIndex - 1L def hasNext: Boolean = iterator.hasNext def next(): (T, Long) = { http://git-wip-us.apache.org/repos/asf/spark/blob/a76846cf/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index aeb2969..15ef32f 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -401,6 +401,9 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { assert(iterator.toArray === Array( (0, -1L + Int.MaxValue), (1, 0L + Int.MaxValue), (2, 1L + Int.MaxValue) )) + intercept[IllegalArgumentException] { + Utils.getIteratorZipWithIndex(Iterator(0, 1, 2), -1L) + } } test("doesDirectoryContainFilesNewerThan") { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
