nuno-faria commented on code in PR #1475:
URL:
https://github.com/apache/datafusion-python/pull/1475#discussion_r3041662605
##########
python/tests/test_context.py:
##########
@@ -551,6 +551,53 @@ def test_table_not_found(ctx):
ctx.table(f"not-found-{uuid4()}")
+def test_session_start_time(ctx):
+ st = ctx.session_start_time()
+ assert isinstance(st, str)
+ assert "T" in st # RFC 3339 format
Review Comment:
What about this? The conversion should fail if the string is badly formatted.
```suggestion
dt.datetime.fromisoformat(st).isoformat()
```
##########
python/tests/test_context.py:
##########
@@ -551,6 +551,53 @@ def test_table_not_found(ctx):
ctx.table(f"not-found-{uuid4()}")
+def test_session_start_time(ctx):
+ st = ctx.session_start_time()
+ assert isinstance(st, str)
+ assert "T" in st # RFC 3339 format
+
+
+def test_enable_ident_normalization(ctx):
+ result = ctx.enable_ident_normalization()
+ assert isinstance(result, bool)
+
+
+def test_parse_sql_expr(ctx):
+ from datafusion.common import DFSchema
+
+ schema = DFSchema.empty()
+ expr = ctx.parse_sql_expr("1 + 2", schema)
+ assert "Int64(1) + Int64(2)" in str(expr)
Review Comment:
```suggestion
assert str(expr) == "Expr(Int64(1) + Int64(2))"
```
##########
python/datafusion/context.py:
##########
@@ -1141,6 +1142,120 @@ def session_id(self) -> str:
"""Return an id that uniquely identifies this
:py:class:`SessionContext`."""
return self.ctx.session_id()
+ def session_start_time(self) -> str:
+ """Return the session start time as an RFC 3339 formatted string.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> start_time = ctx.session_start_time()
+ >>> assert "T" in start_time # RFC 3339 contains a 'T' separator
Review Comment:
This assert feels a little odd, what about showing a result?
```python
>>> ctx.session_start_time()
'2026-01-01T12:34:56.123456789+00:00'
```
##########
python/datafusion/context.py:
##########
@@ -1141,6 +1142,120 @@ def session_id(self) -> str:
"""Return an id that uniquely identifies this
:py:class:`SessionContext`."""
return self.ctx.session_id()
+ def session_start_time(self) -> str:
+ """Return the session start time as an RFC 3339 formatted string.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> start_time = ctx.session_start_time()
+ >>> assert "T" in start_time # RFC 3339 contains a 'T' separator
+ """
+ return self.ctx.session_start_time()
+
+ def enable_ident_normalization(self) -> bool:
+ """Return whether identifier normalization (lowercasing) is enabled.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> assert isinstance(ctx.enable_ident_normalization(), bool)
Review Comment:
Same thing here:
```suggestion
>>> ctx.enable_ident_normalization()
True
```
##########
python/datafusion/context.py:
##########
@@ -1141,6 +1142,120 @@ def session_id(self) -> str:
"""Return an id that uniquely identifies this
:py:class:`SessionContext`."""
return self.ctx.session_id()
+ def session_start_time(self) -> str:
+ """Return the session start time as an RFC 3339 formatted string.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> start_time = ctx.session_start_time()
+ >>> assert "T" in start_time # RFC 3339 contains a 'T' separator
+ """
+ return self.ctx.session_start_time()
+
+ def enable_ident_normalization(self) -> bool:
+ """Return whether identifier normalization (lowercasing) is enabled.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> assert isinstance(ctx.enable_ident_normalization(), bool)
+ """
+ return self.ctx.enable_ident_normalization()
+
+ def parse_sql_expr(self, sql: str, schema: DFSchema) -> Expr:
+ """Parse a SQL expression string into a logical expression.
+
+ Args:
+ sql: SQL expression string.
+ schema: Schema to use for resolving column references.
+
+ Returns:
+ Parsed expression.
+
+ Examples:
+ >>> from datafusion.common import DFSchema
+ >>> ctx = SessionContext()
+ >>> schema = DFSchema.empty()
+ >>> expr = ctx.parse_sql_expr("1 + 2", schema)
+ >>> assert "Int64(1) + Int64(2)" in str(expr)
+ """
+ from datafusion.expr import Expr # noqa: PLC0415
+
+ return Expr(self.ctx.parse_sql_expr(sql, schema))
+
+ def execute_logical_plan(self, plan: LogicalPlan) -> DataFrame:
+ """Execute a :py:class:`~datafusion.plan.LogicalPlan` and return a
DataFrame.
+
+ Args:
+ plan: Logical plan to execute.
+
+ Returns:
+ DataFrame resulting from the execution.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+ >>> plan = df.logical_plan()
+ >>> df2 = ctx.execute_logical_plan(plan)
+ >>> df2.collect()[0].column(0)
+ <pyarrow.lib.Int64Array object at ...>
+ [
+ 1,
+ 2,
+ 3
+ ]
+ """
+ return DataFrame(self.ctx.execute_logical_plan(plan._raw_plan))
+
+ def refresh_catalogs(self) -> None:
+ """Refresh catalog metadata.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> ctx.refresh_catalogs()
+ """
+ self.ctx.refresh_catalogs()
+
+ def remove_optimizer_rule(self, name: str) -> bool:
+ """Remove an optimizer rule by name.
+
+ Args:
+ name: Name of the optimizer rule to remove.
+
+ Returns:
+ True if a rule with the given name was found and removed.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> ctx.remove_optimizer_rule("nonexistent_rule")
+ False
+ """
+ return self.ctx.remove_optimizer_rule(name)
+
+ def table_provider(self, name: str) -> Table:
+ """Return the :py:class:`~datafusion.catalog.Table` for the given
table name.
+
+ Args:
+ name: Name of the table.
+
+ Returns:
+ The table provider.
+
+ Raises:
+ KeyError: If the table is not found.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> ctx = SessionContext()
+ >>> batch = pa.RecordBatch.from_pydict({"x": [1, 2]})
+ >>> ctx.register_record_batches("my_table", [[batch]])
+ >>> tbl = ctx.table_provider("my_table")
+ >>> tbl.schema
+ x: int64
+ """
+ from datafusion.catalog import Table # noqa: PLC0415
+
+ return Table(self.ctx.table_provider(name))
Review Comment:
Also here I think we can remove the `Table` and the import.
##########
python/datafusion/context.py:
##########
@@ -1141,6 +1142,120 @@ def session_id(self) -> str:
"""Return an id that uniquely identifies this
:py:class:`SessionContext`."""
return self.ctx.session_id()
+ def session_start_time(self) -> str:
+ """Return the session start time as an RFC 3339 formatted string.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> start_time = ctx.session_start_time()
+ >>> assert "T" in start_time # RFC 3339 contains a 'T' separator
+ """
+ return self.ctx.session_start_time()
+
+ def enable_ident_normalization(self) -> bool:
+ """Return whether identifier normalization (lowercasing) is enabled.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> assert isinstance(ctx.enable_ident_normalization(), bool)
+ """
+ return self.ctx.enable_ident_normalization()
+
+ def parse_sql_expr(self, sql: str, schema: DFSchema) -> Expr:
+ """Parse a SQL expression string into a logical expression.
+
+ Args:
+ sql: SQL expression string.
+ schema: Schema to use for resolving column references.
+
+ Returns:
+ Parsed expression.
+
+ Examples:
+ >>> from datafusion.common import DFSchema
+ >>> ctx = SessionContext()
+ >>> schema = DFSchema.empty()
+ >>> expr = ctx.parse_sql_expr("1 + 2", schema)
+ >>> assert "Int64(1) + Int64(2)" in str(expr)
Review Comment:
```suggestion
>>> ctx.parse_sql_expr("1 + 2", schema)
Expr(Int64(1) + Int64(2))
```
##########
python/tests/test_context.py:
##########
@@ -551,6 +551,53 @@ def test_table_not_found(ctx):
ctx.table(f"not-found-{uuid4()}")
+def test_session_start_time(ctx):
+ st = ctx.session_start_time()
+ assert isinstance(st, str)
+ assert "T" in st # RFC 3339 format
+
+
+def test_enable_ident_normalization(ctx):
+ result = ctx.enable_ident_normalization()
+ assert isinstance(result, bool)
+
+
+def test_parse_sql_expr(ctx):
+ from datafusion.common import DFSchema
+
+ schema = DFSchema.empty()
+ expr = ctx.parse_sql_expr("1 + 2", schema)
+ assert "Int64(1) + Int64(2)" in str(expr)
+
+
+def test_execute_logical_plan(ctx):
+ df = ctx.from_pydict({"a": [1, 2, 3]})
+ plan = df.logical_plan()
+ df2 = ctx.execute_logical_plan(plan)
+ result = df2.collect()
+ assert result[0].column(0) == pa.array([1, 2, 3])
+
+
+def test_refresh_catalogs(ctx):
+ ctx.refresh_catalogs()
+
+
+def test_remove_optimizer_rule(ctx):
+ assert ctx.remove_optimizer_rule("nonexistent_rule") is False
Review Comment:
Testing with a rule that exists as well:
```suggestion
assert ctx.remove_optimizer_rule("push_down_filter")
assert ctx.remove_optimizer_rule("nonexistent_rule") is False
```
##########
python/datafusion/context.py:
##########
@@ -1141,6 +1142,120 @@ def session_id(self) -> str:
"""Return an id that uniquely identifies this
:py:class:`SessionContext`."""
return self.ctx.session_id()
+ def session_start_time(self) -> str:
+ """Return the session start time as an RFC 3339 formatted string.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> start_time = ctx.session_start_time()
+ >>> assert "T" in start_time # RFC 3339 contains a 'T' separator
+ """
+ return self.ctx.session_start_time()
+
+ def enable_ident_normalization(self) -> bool:
+ """Return whether identifier normalization (lowercasing) is enabled.
+
+ Examples:
+ >>> ctx = SessionContext()
+ >>> assert isinstance(ctx.enable_ident_normalization(), bool)
+ """
+ return self.ctx.enable_ident_normalization()
+
+ def parse_sql_expr(self, sql: str, schema: DFSchema) -> Expr:
+ """Parse a SQL expression string into a logical expression.
+
+ Args:
+ sql: SQL expression string.
+ schema: Schema to use for resolving column references.
+
+ Returns:
+ Parsed expression.
+
+ Examples:
+ >>> from datafusion.common import DFSchema
+ >>> ctx = SessionContext()
+ >>> schema = DFSchema.empty()
+ >>> expr = ctx.parse_sql_expr("1 + 2", schema)
+ >>> assert "Int64(1) + Int64(2)" in str(expr)
+ """
+ from datafusion.expr import Expr # noqa: PLC0415
+
+ return Expr(self.ctx.parse_sql_expr(sql, schema))
Review Comment:
I think we could remove the import and the wrapping with `Expr`.
##########
python/tests/test_context.py:
##########
@@ -551,6 +551,53 @@ def test_table_not_found(ctx):
ctx.table(f"not-found-{uuid4()}")
+def test_session_start_time(ctx):
+ st = ctx.session_start_time()
+ assert isinstance(st, str)
+ assert "T" in st # RFC 3339 format
+
+
+def test_enable_ident_normalization(ctx):
+ result = ctx.enable_ident_normalization()
+ assert isinstance(result, bool)
Review Comment:
I think it's better to change the value and check it.
```suggestion
assert ctx.enable_ident_normalization()
ctx.sql("set datafusion.sql_parser.enable_ident_normalization = false")
assert ctx.enable_ident_normalization() is False
```
Unrelated but the original method name is a bit misleading since it does not
enable the flag, only returns the value.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]