spark git commit: [SPARK-6661] Python type errors should print type, not object

joshrosen Mon, 20 Apr 2015 10:44:44 -0700

Repository: spark
Updated Branches:
  refs/heads/master 968ad9721 -> 77176619a



[SPARK-6661] Python type errors should print type, not object

Author: Elisey Zanko <[email protected]>

Closes #5361 from 31z4/spark-6661 and squashes the following commits:

73c5d79 [Elisey Zanko] Python type errors should print type, not object


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77176619
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77176619
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77176619

Branch: refs/heads/master
Commit: 77176619a97d07811ab20e1dde4677359d85eb33
Parents: 968ad97
Author: Elisey Zanko <[email protected]>
Authored: Mon Apr 20 10:44:09 2015 -0700
Committer: Josh Rosen <[email protected]>
Committed: Mon Apr 20 10:44:09 2015 -0700

----------------------------------------------------------------------
 python/pyspark/accumulators.py      |  2 +-
 python/pyspark/context.py           |  2 +-
 python/pyspark/ml/param/__init__.py |  2 +-
 python/pyspark/ml/pipeline.py       |  4 ++--
 python/pyspark/mllib/linalg.py      |  4 ++--
 python/pyspark/mllib/regression.py  |  2 +-
 python/pyspark/mllib/tests.py       |  6 ++++--
 python/pyspark/sql/_types.py        | 12 ++++++------
 python/pyspark/sql/context.py       |  8 ++++----
 python/pyspark/sql/dataframe.py     |  2 +-
 10 files changed, 23 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/accumulators.py
----------------------------------------------------------------------
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 7271809..0d21a13 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -83,7 +83,7 @@ Py4JJavaError:...
 >>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
 Traceback (most recent call last):
     ...
-Exception:...
+TypeError:...
 """
 
 import sys

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/context.py
----------------------------------------------------------------------
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1dc2fec..6a743ac 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -671,7 +671,7 @@ class SparkContext(object):
             elif isinstance(value, complex):
                 accum_param = accumulators.COMPLEX_ACCUMULATOR_PARAM
             else:
-                raise Exception("No default accumulator param for type %s" % 
type(value))
+                raise TypeError("No default accumulator param for type %s" % 
type(value))
         SparkContext._next_accum_id += 1
         return Accumulator(SparkContext._next_accum_id - 1, value, accum_param)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/ml/param/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/param/__init__.py 
b/python/pyspark/ml/param/__init__.py
index 9fccb65..49c20b4 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -30,7 +30,7 @@ class Param(object):
 
     def __init__(self, parent, name, doc):
         if not isinstance(parent, Params):
-            raise ValueError("Parent must be a Params but got type %s." % 
type(parent).__name__)
+            raise TypeError("Parent must be a Params but got type %s." % 
type(parent))
         self.parent = parent
         self.name = str(name)
         self.doc = str(doc)

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/ml/pipeline.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index d94ecff..7c1ec30 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -131,8 +131,8 @@ class Pipeline(Estimator):
         stages = paramMap[self.stages]
         for stage in stages:
             if not (isinstance(stage, Estimator) or isinstance(stage, 
Transformer)):
-                raise ValueError(
-                    "Cannot recognize a pipeline stage of type %s." % 
type(stage).__name__)
+                raise TypeError(
+                    "Cannot recognize a pipeline stage of type %s." % 
type(stage))
         indexOfLastEstimator = -1
         for i, stage in enumerate(stages):
             if isinstance(stage, Estimator):

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/mllib/linalg.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index 38b3aa3..ec8c879 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -145,7 +145,7 @@ class VectorUDT(UserDefinedType):
             values = [float(v) for v in obj]
             return (1, None, None, values)
         else:
-            raise ValueError("cannot serialize %r of type %r" % (obj, 
type(obj)))
+            raise TypeError("cannot serialize %r of type %r" % (obj, 
type(obj)))
 
     def deserialize(self, datum):
         assert len(datum) == 4, \
@@ -561,7 +561,7 @@ class SparseVector(Vector):
         inds = self.indices
         vals = self.values
         if not isinstance(index, int):
-            raise ValueError(
+            raise TypeError(
                 "Indices must be of type integer, got type %s" % type(index))
         if index < 0:
             index += self.size

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/mllib/regression.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/regression.py 
b/python/pyspark/mllib/regression.py
index cd7310a..a0117c5 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -170,7 +170,7 @@ def _regression_train_wrapper(train_func, modelClass, data, 
initial_weights):
     from pyspark.mllib.classification import LogisticRegressionModel
     first = data.first()
     if not isinstance(first, LabeledPoint):
-        raise ValueError("data should be an RDD of LabeledPoint, but got %s" % 
first)
+        raise TypeError("data should be an RDD of LabeledPoint, but got %s" % 
type(first))
     if initial_weights is None:
         initial_weights = [0.0] * len(data.first().features)
     if (modelClass == LogisticRegressionModel):

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/mllib/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index c6ed5ac..849c883 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -135,8 +135,10 @@ class VectorTests(PySparkTestCase):
         self.assertEquals(sv[-1], 2)
         self.assertEquals(sv[-2], 0)
         self.assertEquals(sv[-4], 0)
-        for ind in [4, -5, 7.8]:
+        for ind in [4, -5]:
             self.assertRaises(ValueError, sv.__getitem__, ind)
+        for ind in [7.8, '1']:
+            self.assertRaises(TypeError, sv.__getitem__, ind)
 
     def test_matrix_indexing(self):
         mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
@@ -450,7 +452,7 @@ class VectorUDTTests(PySparkTestCase):
             elif isinstance(v, DenseVector):
                 self.assertEqual(v, self.dv1)
             else:
-                raise ValueError("expecting a vector but got %r of type %r" % 
(v, type(v)))
+                raise TypeError("expecting a vector but got %r of type %r" % 
(v, type(v)))
 
 
 @unittest.skipIf(not _have_scipy, "SciPy not installed")

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/sql/_types.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/_types.py
index 492c0cb..110d115 100644
--- a/python/pyspark/sql/_types.py
+++ b/python/pyspark/sql/_types.py
@@ -562,8 +562,8 @@ def _infer_type(obj):
     else:
         try:
             return _infer_schema(obj)
-        except ValueError:
-            raise ValueError("not supported type: %s" % type(obj))
+        except TypeError:
+            raise TypeError("not supported type: %s" % type(obj))
 
 
 def _infer_schema(row):
@@ -584,7 +584,7 @@ def _infer_schema(row):
         items = sorted(row.__dict__.items())
 
     else:
-        raise ValueError("Can not infer schema for type: %s" % type(row))
+        raise TypeError("Can not infer schema for type: %s" % type(row))
 
     fields = [StructField(k, _infer_type(v), True) for k, v in items]
     return StructType(fields)
@@ -696,7 +696,7 @@ def _merge_type(a, b):
         return a
     elif type(a) is not type(b):
         # TODO: type cast (such as int -> long)
-        raise TypeError("Can not merge type %s and %s" % (a, b))
+        raise TypeError("Can not merge type %s and %s" % (type(a), type(b)))
 
     # same type
     if isinstance(a, StructType):
@@ -773,7 +773,7 @@ def _create_converter(dataType):
         elif hasattr(obj, "__dict__"):  # object
             d = obj.__dict__
         else:
-            raise ValueError("Unexpected obj: %s" % obj)
+            raise TypeError("Unexpected obj type: %s" % type(obj))
 
         if convert_fields:
             return tuple([conv(d.get(name)) for name, conv in zip(names, 
converters)])
@@ -912,7 +912,7 @@ def _infer_schema_type(obj, dataType):
         return StructType(fields)
 
     else:
-        raise ValueError("Unexpected dataType: %s" % dataType)
+        raise TypeError("Unexpected dataType: %s" % type(dataType))
 
 
 _acceptable_types = {

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/sql/context.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index c90afc3..acf3c11 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -208,7 +208,7 @@ class SQLContext(object):
             raise TypeError("Cannot apply schema to DataFrame")
 
         if not isinstance(schema, StructType):
-            raise TypeError("schema should be StructType, but got %s" % schema)
+            raise TypeError("schema should be StructType, but got %s" % 
type(schema))
 
         return self.createDataFrame(rdd, schema)
 
@@ -281,7 +281,7 @@ class SQLContext(object):
                 # data could be list, tuple, generator ...
                 rdd = self._sc.parallelize(data)
             except Exception:
-                raise ValueError("cannot create an RDD from type: %s" % 
type(data))
+                raise TypeError("cannot create an RDD from type: %s" % 
type(data))
         else:
             rdd = data
 
@@ -293,8 +293,8 @@ class SQLContext(object):
         if isinstance(schema, (list, tuple)):
             first = rdd.first()
             if not isinstance(first, (list, tuple)):
-                raise ValueError("each row in `rdd` should be list or tuple, "
-                                 "but got %r" % type(first))
+                raise TypeError("each row in `rdd` should be list or tuple, "
+                                "but got %r" % type(first))
             row_cls = Row(*schema)
             schema = self._inferSchema(rdd.map(lambda r: row_cls(*r)), 
samplingRatio)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/77176619/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index d70c5b0..75c181c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -608,7 +608,7 @@ class DataFrame(object):
             jc = self._jdf.apply(self.columns[item])
             return Column(jc)
         else:
-            raise TypeError("unexpected type: %s" % type(item))
+            raise TypeError("unexpected item type: %s" % type(item))
 
     def __getattr__(self, name):
         """Returns the :class:`Column` denoted by ``name``.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-6661] Python type errors should print type, not object

Reply via email to