This is an automated email from the ASF dual-hosted git repository. cutlerb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 32515d2 [SPARK-26349][PYSPARK] Forbid insecure py4j gateways 32515d2 is described below commit 32515d205a4de4d8838226fa5e5c4e4f66935193 Author: Imran Rashid <iras...@cloudera.com> AuthorDate: Tue Jan 8 11:26:36 2019 -0800 [SPARK-26349][PYSPARK] Forbid insecure py4j gateways Spark always creates secure py4j connections between java and python, but it also allows users to pass in their own connection. This ensures that even passed in connections are secure. Added test cases verifying the failure with a (mocked) insecure gateway. This is closely related to SPARK-26019, but this entirely forbids the insecure connection, rather than creating the "escape-hatch". Closes #23441 from squito/SPARK-26349. Authored-by: Imran Rashid <iras...@cloudera.com> Signed-off-by: Bryan Cutler <cutl...@gmail.com> --- python/pyspark/context.py | 5 +++++ python/pyspark/tests/test_context.py | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 6137ed2..64178eb 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -115,6 +115,11 @@ class SparkContext(object): ValueError:... """ self._callsite = first_spark_call() or CallSite(None, None, None) + if gateway is not None and gateway.gateway_parameters.auth_token is None: + raise ValueError( + "You are trying to pass an insecure Py4j gateway to Spark. This" + " is not allowed as it is a security risk.") + SparkContext._ensure_initialized(self, gateway=gateway, conf=conf) try: self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer, diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py index 201baf4..18d9cd4 100644 --- a/python/pyspark/tests/test_context.py +++ b/python/pyspark/tests/test_context.py @@ -20,6 +20,7 @@ import tempfile import threading import time import unittest +from collections import namedtuple from pyspark import SparkFiles, SparkContext from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest, SPARK_HOME @@ -246,6 +247,15 @@ class ContextTests(unittest.TestCase): with SparkContext() as sc: self.assertGreater(sc.startTime, 0) + def test_forbid_insecure_gateway(self): + # Fail immediately if you try to create a SparkContext + # with an insecure gateway + parameters = namedtuple('MockGatewayParameters', 'auth_token')(None) + mock_insecure_gateway = namedtuple('MockJavaGateway', 'gateway_parameters')(parameters) + with self.assertRaises(ValueError) as context: + SparkContext(gateway=mock_insecure_gateway) + self.assertIn("insecure Py4j gateway", str(context.exception)) + if __name__ == "__main__": from pyspark.tests.test_context import * --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org