nuno-faria commented on code in PR #1461:
URL:
https://github.com/apache/datafusion-python/pull/1461#discussion_r3035597667
##########
python/tests/test_functions.py:
##########
@@ -668,6 +668,108 @@ def test_array_function_obj_tests(stmt, py_expr):
assert a == b
[email protected](
+ ("args", "expected"),
+ [
+ pytest.param(
+ ({"x": 1, "y": 2},),
+ [("x", 1), ("y", 2)],
+ id="dict",
+ ),
+ pytest.param(
+ ({"x": literal(1), "y": literal(2)},),
+ [("x", 1), ("y", 2)],
+ id="dict_with_exprs",
+ ),
+ pytest.param(
+ ("x", 1, "y", 2),
+ [("x", 1), ("y", 2)],
+ id="variadic_pairs",
+ ),
+ pytest.param(
+ (literal("x"), literal(1), literal("y"), literal(2)),
+ [("x", 1), ("y", 2)],
+ id="variadic_with_exprs",
+ ),
+ ],
+)
+def test_make_map(args, expected):
+ ctx = SessionContext()
+ batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
+ df = ctx.create_dataframe([[batch]])
+
+ result = df.select(f.make_map(*args).alias("m")).collect()[0].column(0)
+ assert result[0].as_py() == expected
+
+
+def test_make_map_from_two_lists():
+ ctx = SessionContext()
+ batch = pa.RecordBatch.from_arrays(
+ [
+ pa.array(["k1", "k2", "k3"]),
+ pa.array([10, 20, 30]),
+ ],
+ names=["keys", "vals"],
+ )
+ df = ctx.create_dataframe([[batch]])
+
+ m = f.make_map([column("keys")], [column("vals")])
+ result = df.select(f.map_keys(m).alias("k")).collect()[0].column(0)
+ for i, expected in enumerate(["k1", "k2", "k3"]):
+ assert result[i].as_py() == [expected]
Review Comment:
I think this is easier to read:
```suggestion
assert result.to_pylist() == [["k1"], ["k2"], ["k3"]]
```
##########
python/tests/test_functions.py:
##########
@@ -668,6 +668,108 @@ def test_array_function_obj_tests(stmt, py_expr):
assert a == b
[email protected](
+ ("args", "expected"),
+ [
+ pytest.param(
+ ({"x": 1, "y": 2},),
+ [("x", 1), ("y", 2)],
+ id="dict",
+ ),
+ pytest.param(
+ ({"x": literal(1), "y": literal(2)},),
+ [("x", 1), ("y", 2)],
+ id="dict_with_exprs",
+ ),
+ pytest.param(
+ ("x", 1, "y", 2),
+ [("x", 1), ("y", 2)],
+ id="variadic_pairs",
+ ),
+ pytest.param(
+ (literal("x"), literal(1), literal("y"), literal(2)),
+ [("x", 1), ("y", 2)],
+ id="variadic_with_exprs",
+ ),
+ ],
+)
+def test_make_map(args, expected):
+ ctx = SessionContext()
+ batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
+ df = ctx.create_dataframe([[batch]])
+
+ result = df.select(f.make_map(*args).alias("m")).collect()[0].column(0)
+ assert result[0].as_py() == expected
+
+
+def test_make_map_from_two_lists():
+ ctx = SessionContext()
+ batch = pa.RecordBatch.from_arrays(
+ [
+ pa.array(["k1", "k2", "k3"]),
+ pa.array([10, 20, 30]),
+ ],
+ names=["keys", "vals"],
+ )
+ df = ctx.create_dataframe([[batch]])
+
+ m = f.make_map([column("keys")], [column("vals")])
+ result = df.select(f.map_keys(m).alias("k")).collect()[0].column(0)
+ for i, expected in enumerate(["k1", "k2", "k3"]):
+ assert result[i].as_py() == [expected]
+
+ result = df.select(f.map_values(m).alias("v")).collect()[0].column(0)
+ for i, expected in enumerate([10, 20, 30]):
+ assert result[i].as_py() == [expected]
Review Comment:
```suggestion
assert result.to_pylist() == [[10], [20], [30]]
```
##########
python/datafusion/functions.py:
##########
@@ -3374,6 +3380,158 @@ def empty(array: Expr) -> Expr:
return array_empty(array)
+# map functions
+
+
+def make_map(*args: Any) -> Expr:
+ """Returns a map expression.
+
+ Supports three calling conventions:
+
+ - ``make_map({"a": 1, "b": 2})`` — from a Python dictionary.
+ - ``make_map([keys], [values])`` — from a list of keys and a list of
+ their associated values. Both lists must be the same length.
+ - ``make_map(k1, v1, k2, v2, ...)`` — from alternating keys and their
+ associated values.
+
+ Keys and values that are not already :py:class:`~datafusion.expr.Expr`
+ are automatically converted to literal expressions.
+
+ Examples:
+ From a dictionary:
+
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.make_map({"a": 1, "b": 2}).alias("m"))
+ >>> result.collect_column("m")[0].as_py()
+ [('a', 1), ('b', 2)]
+
+ From two lists:
+
+ >>> df = ctx.from_pydict({"key": ["x", "y"], "val": [10, 20]})
+ >>> df = df.select(
+ ... dfn.functions.make_map(
+ ... [dfn.col("key")], [dfn.col("val")]
+ ... ).alias("m"))
+ >>> df.collect_column("m")[0].as_py()
+ [('x', 10)]
+
+ From alternating keys and values:
+
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.make_map("x", 1, "y", 2).alias("m"))
+ >>> result.collect_column("m")[0].as_py()
+ [('x', 1), ('y', 2)]
+ """
+ if len(args) == 1 and isinstance(args[0], dict):
+ key_list = list(args[0].keys())
+ value_list = list(args[0].values())
+ elif (
+ len(args) == 2 # noqa: PLR2004
+ and isinstance(args[0], list)
+ and isinstance(args[1], list)
+ ):
+ if len(args[0]) != len(args[1]):
+ msg = "make_map requires key and value lists to be the same length"
+ raise ValueError(msg)
+ key_list = args[0]
+ value_list = args[1]
+ elif len(args) >= 2 and len(args) % 2 == 0: # noqa: PLR2004
+ key_list = list(args[0::2])
+ value_list = list(args[1::2])
+ else:
+ msg = (
+ "make_map expects a dict, two lists, or an even number of "
+ "key-value arguments"
+ )
+ raise ValueError(msg)
+
+ key_exprs = [k if isinstance(k, Expr) else Expr.literal(k) for k in
key_list]
+ val_exprs = [v if isinstance(v, Expr) else Expr.literal(v) for v in
value_list]
+ return Expr(f.make_map([k.expr for k in key_exprs], [v.expr for v in
val_exprs]))
+
+
+def map_keys(map: Expr) -> Expr:
+ """Returns a list of all keys in the map.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> df = df.select(
+ ... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
+ >>> result = df.select(
+ ... dfn.functions.map_keys(dfn.col("m")).alias("keys"))
+ >>> result.collect_column("keys")[0].as_py()
+ ['x', 'y']
+ """
+ return Expr(f.map_keys(map.expr))
+
+
+def map_values(map: Expr) -> Expr:
+ """Returns a list of all values in the map.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> df = df.select(
+ ... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
+ >>> result = df.select(
+ ... dfn.functions.map_values(dfn.col("m")).alias("vals"))
+ >>> result.collect_column("vals")[0].as_py()
+ [1, 2]
+ """
+ return Expr(f.map_values(map.expr))
+
+
+def map_extract(map: Expr, key: Expr) -> Expr:
+ """Return the value for a given key in the map.
Review Comment:
```suggestion
"""Returns the value for a given key in the map.
```
##########
python/datafusion/functions.py:
##########
@@ -3374,6 +3380,158 @@ def empty(array: Expr) -> Expr:
return array_empty(array)
+# map functions
+
+
+def make_map(*args: Any) -> Expr:
+ """Returns a map expression.
+
+ Supports three calling conventions:
+
+ - ``make_map({"a": 1, "b": 2})`` — from a Python dictionary.
+ - ``make_map([keys], [values])`` — from a list of keys and a list of
+ their associated values. Both lists must be the same length.
+ - ``make_map(k1, v1, k2, v2, ...)`` — from alternating keys and their
+ associated values.
+
+ Keys and values that are not already :py:class:`~datafusion.expr.Expr`
+ are automatically converted to literal expressions.
+
+ Examples:
+ From a dictionary:
+
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.make_map({"a": 1, "b": 2}).alias("m"))
+ >>> result.collect_column("m")[0].as_py()
+ [('a', 1), ('b', 2)]
+
+ From two lists:
+
+ >>> df = ctx.from_pydict({"key": ["x", "y"], "val": [10, 20]})
+ >>> df = df.select(
+ ... dfn.functions.make_map(
+ ... [dfn.col("key")], [dfn.col("val")]
+ ... ).alias("m"))
+ >>> df.collect_column("m")[0].as_py()
+ [('x', 10)]
+
+ From alternating keys and values:
+
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.make_map("x", 1, "y", 2).alias("m"))
+ >>> result.collect_column("m")[0].as_py()
+ [('x', 1), ('y', 2)]
+ """
+ if len(args) == 1 and isinstance(args[0], dict):
+ key_list = list(args[0].keys())
+ value_list = list(args[0].values())
+ elif (
+ len(args) == 2 # noqa: PLR2004
+ and isinstance(args[0], list)
+ and isinstance(args[1], list)
+ ):
+ if len(args[0]) != len(args[1]):
+ msg = "make_map requires key and value lists to be the same length"
+ raise ValueError(msg)
+ key_list = args[0]
+ value_list = args[1]
+ elif len(args) >= 2 and len(args) % 2 == 0: # noqa: PLR2004
+ key_list = list(args[0::2])
+ value_list = list(args[1::2])
+ else:
+ msg = (
+ "make_map expects a dict, two lists, or an even number of "
+ "key-value arguments"
+ )
+ raise ValueError(msg)
+
+ key_exprs = [k if isinstance(k, Expr) else Expr.literal(k) for k in
key_list]
+ val_exprs = [v if isinstance(v, Expr) else Expr.literal(v) for v in
value_list]
+ return Expr(f.make_map([k.expr for k in key_exprs], [v.expr for v in
val_exprs]))
+
+
+def map_keys(map: Expr) -> Expr:
+ """Returns a list of all keys in the map.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> df = df.select(
+ ... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
+ >>> result = df.select(
+ ... dfn.functions.map_keys(dfn.col("m")).alias("keys"))
+ >>> result.collect_column("keys")[0].as_py()
+ ['x', 'y']
+ """
+ return Expr(f.map_keys(map.expr))
+
+
+def map_values(map: Expr) -> Expr:
+ """Returns a list of all values in the map.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> df = df.select(
+ ... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
+ >>> result = df.select(
+ ... dfn.functions.map_values(dfn.col("m")).alias("vals"))
+ >>> result.collect_column("vals")[0].as_py()
+ [1, 2]
+ """
+ return Expr(f.map_values(map.expr))
+
+
+def map_extract(map: Expr, key: Expr) -> Expr:
+ """Return the value for a given key in the map.
+
+ Returns ``[None]`` if the key is absent.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> df = df.select(
+ ... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
+ >>> result = df.select(
+ ... dfn.functions.map_extract(
+ ... dfn.col("m"), dfn.lit("x")
+ ... ).alias("val"))
+ >>> result.collect_column("val")[0].as_py()
+ [1]
+ """
+ return Expr(f.map_extract(map.expr, key.expr))
+
+
+def map_entries(map: Expr) -> Expr:
+ """Returns a list of all entries (key-value struct pairs) in the map.
+
+ Examples:
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> df = df.select(
+ ... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
+ >>> result = df.select(
+ ... dfn.functions.map_entries(dfn.col("m")).alias("entries"))
+ >>> result.collect_column("entries")[0].as_py()
+ [{'key': 'x', 'value': 1}, {'key': 'y', 'value': 2}]
+ """
+ return Expr(f.map_entries(map.expr))
+
+
+def element_at(map: Expr, key: Expr) -> Expr:
+ """Return the value for a given key in the map.
Review Comment:
```suggestion
"""Returns the value for a given key in the map.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]