This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 90c6c2b36743 [SPARK-45222][PYTHON][DOCS] Refine docstring of
`DataFrameReader.json`
90c6c2b36743 is described below
commit 90c6c2b36743e64ecdeaebb34fe37aa348701370
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Nov 7 09:58:50 2023 -0800
[SPARK-45222][PYTHON][DOCS] Refine docstring of `DataFrameReader.json`
### What changes were proposed in this pull request?
This PR proposes to improve the docstring of `DataFrameReader.json`.
### Why are the changes needed?
For end users, and better usability of PySpark.
### Does this PR introduce _any_ user-facing change?
Yes, it fixes the user facing documentation.
### How was this patch tested?
Manually tested.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43687 from HyukjinKwon/SPARK-45222.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/readwriter.py | 51 ++++++++++++++++++++++++++++++++++------
1 file changed, 44 insertions(+), 7 deletions(-)
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 75faa13f02b3..b7e2c145f443 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -380,22 +380,59 @@ class DataFrameReader(OptionUtils):
Examples
--------
- Write a DataFrame into a JSON file and read it back.
+ Example 1: Write a DataFrame into a JSON file and read it back.
>>> import tempfile
>>> with tempfile.TemporaryDirectory() as d:
... # Write a DataFrame into a JSON file
... spark.createDataFrame(
- ... [{"age": 100, "name": "Hyukjin Kwon"}]
+ ... [{"age": 100, "name": "Hyukjin"}]
... ).write.mode("overwrite").format("json").save(d)
...
... # Read the JSON file as a DataFrame.
... spark.read.json(d).show()
- +---+------------+
- |age| name|
- +---+------------+
- |100|Hyukjin Kwon|
- +---+------------+
+ +---+-------+
+ |age| name|
+ +---+-------+
+ |100|Hyukjin|
+ +---+-------+
+
+ Example 2: Read JSON from multiple files in a directory
+
+ >>> import tempfile
+ >>> with tempfile.TemporaryDirectory() as d1,
tempfile.TemporaryDirectory() as d2:
+ ... # Write a DataFrame into a JSON file
+ ... spark.createDataFrame(
+ ... [{"age": 30, "name": "Bob"}]
+ ... ).write.mode("overwrite").format("json").save(d1)
+ ...
+ ... # Read the JSON files as a DataFrame.
+ ... spark.createDataFrame(
+ ... [{"age": 25, "name": "Alice"}]
+ ... ).write.mode("overwrite").format("json").save(d2)
+ ... spark.read.json([d1, d2]).show()
+ +---+-----+
+ |age| name|
+ +---+-----+
+ | 25|Alice|
+ | 30| Bob|
+ +---+-----+
+
+ Example 3: Read JSON with a custom schema
+
+ >>> import tempfile
+ >>> with tempfile.TemporaryDirectory() as d:
+ ... # Write a DataFrame into a JSON file
+ ... spark.createDataFrame(
+ ... [{"age": 30, "name": "Bob"}]
+ ... ).write.mode("overwrite").format("json").save(d)
+ ... custom_schema = "name STRING, age INT"
+ ... spark.read.json(d, schema=custom_schema).show()
+ +----+---+
+ |name|age|
+ +----+---+
+ | Bob| 30|
+ +----+---+
"""
self._set_opts(
schema=schema,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]