This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 7c18001f0d GH-45848: [C++][Python][R] Remove deprecated PARQUET_2_0 
(#45849)
7c18001f0d is described below

commit 7c18001f0d7bd97471237719702c33165858bba7
Author: Alenka Frim <[email protected]>
AuthorDate: Thu Mar 27 12:19:08 2025 +0100

    GH-45848: [C++][Python][R] Remove deprecated PARQUET_2_0 (#45849)
    
    ### Rationale for this change
    `PARQUET_2_0` has been deprecated for a while now and can be removed from 
the codebase.
    
    ### What changes are included in this PR?
    Removal of deprecated enum value `PARQUET_2_0`, and the corresponding 
Python and R definitions.
    
    ### Are these changes tested?
    By existing unit tests, some of which have been updated.
    
    ### Are there any user-facing changes?
    * `PARQUET_2_0` is removed in Parquet C++
    * `"2.0"` is not allowed anymore as Parquet version in the PyArrow and R 
Parquet APIs
    
    * GitHub Issue: #45848
    
    Lead-authored-by: AlenkaF <[email protected]>
    Co-authored-by: Bryce Mecum <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 19 ++++++-------------
 cpp/src/parquet/metadata.cc                       |  3 ---
 cpp/src/parquet/type_fwd.h                        |  6 ------
 python/pyarrow/_parquet.pxd                       |  1 -
 python/pyarrow/_parquet.pyx                       |  8 --------
 python/pyarrow/tests/parquet/test_data_types.py   |  2 +-
 python/pyarrow/tests/parquet/test_metadata.py     |  2 +-
 python/pyarrow/tests/parquet/test_pandas.py       |  6 ++----
 python/pyarrow/tests/test_pandas.py               |  3 +--
 r/R/enums.R                                       |  2 +-
 r/R/parquet.R                                     | 15 +++------------
 r/man/enums.Rd                                    |  2 +-
 r/man/write_parquet.Rd                            |  4 ++--
 r/tests/testthat/test-parquet.R                   |  9 ---------
 14 files changed, 18 insertions(+), 64 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc 
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index f80ab83c86..0cc5f948c7 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -1264,7 +1264,7 @@ TEST_F(TestInt96ParquetIO, ReadIntoTimestamp) {
 
 using TestUInt32ParquetIO = TestParquetIO<::arrow::UInt32Type>;
 
-TEST_F(TestUInt32ParquetIO, Parquet_2_0_Compatibility) {
+TEST_F(TestUInt32ParquetIO, Parquet_2_6_Compatibility) {
   // This also tests max_definition_level = 1
   std::shared_ptr<Array> values;
 
@@ -2055,10 +2055,6 @@ TEST(TestArrowReadWrite, 
ParquetVersionTimestampDifferences) {
                                           .version(ParquetVersion::PARQUET_1_0)
                                           ->build();
   ARROW_SUPPRESS_DEPRECATION_WARNING
-  auto parquet_version_2_0_properties = ::parquet::WriterProperties::Builder()
-                                            
.version(ParquetVersion::PARQUET_2_0)
-                                            ->build();
-  ARROW_UNSUPPRESS_DEPRECATION_WARNING
   auto parquet_version_2_4_properties = ::parquet::WriterProperties::Builder()
                                             
.version(ParquetVersion::PARQUET_2_4)
                                             ->build();
@@ -2066,8 +2062,8 @@ TEST(TestArrowReadWrite, 
ParquetVersionTimestampDifferences) {
                                             
.version(ParquetVersion::PARQUET_2_6)
                                             ->build();
   const std::vector<std::shared_ptr<WriterProperties>> all_properties = {
-      parquet_version_1_properties, parquet_version_2_0_properties,
-      parquet_version_2_4_properties, parquet_version_2_6_properties};
+      parquet_version_1_properties, parquet_version_2_4_properties,
+      parquet_version_2_6_properties};
 
   {
     // Using Parquet version 1.0 and 2.4 defaults, seconds should be coerced to
@@ -2081,13 +2077,11 @@ TEST(TestArrowReadWrite, 
ParquetVersionTimestampDifferences) {
                                                      
parquet_version_2_4_properties));
   }
   {
-    // Using Parquet version 2.0 and 2.6 defaults, seconds should be coerced to
+    // Using Parquet version 2.6 defaults, seconds should be coerced to
     // milliseconds and nanoseconds should be retained
     auto expected_schema = schema({field("ts:s", t_ms), field("ts:ms", t_ms),
                                    field("ts:us", t_us), field("ts:ns", 
t_ns)});
     auto expected_table = Table::Make(expected_schema, {a_ms, a_ms, a_us, 
a_ns});
-    ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, 
expected_table,
-                                                     
parquet_version_2_0_properties));
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, 
expected_table,
                                                      
parquet_version_2_6_properties));
   }
@@ -2133,9 +2127,8 @@ TEST(TestArrowReadWrite, 
ParquetVersionTimestampDifferences) {
                              CreateOutputStream(), input_table->num_rows(), 
properties,
                              arrow_coerce_to_nanos_properties));
   }
-  // Using Parquet versions "2.0" and 2.6, coercing to (int64) nanoseconds is 
allowed
-  for (const auto& properties :
-       {parquet_version_2_0_properties, parquet_version_2_6_properties}) {
+  // Using Parquet version 2.6, coercing to (int64) nanoseconds is allowed
+  for (const auto& properties : {parquet_version_2_6_properties}) {
     ARROW_SCOPED_TRACE("format = ", 
ParquetVersionToString(properties->version()));
     auto expected_schema = schema({field("ts:s", t_ns), field("ts:ms", t_ns),
                                    field("ts:us", t_ns), field("ts:ns", 
t_ns)});
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index 9b53da021f..398ff761bd 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -76,9 +76,6 @@ std::string ParquetVersionToString(ParquetVersion::type ver) {
     case ParquetVersion::PARQUET_1_0:
       return "1.0";
       ARROW_SUPPRESS_DEPRECATION_WARNING
-    case ParquetVersion::PARQUET_2_0:
-      return "pseudo-2.0";
-      ARROW_UNSUPPRESS_DEPRECATION_WARNING
     case ParquetVersion::PARQUET_2_4:
       return "2.4";
     case ParquetVersion::PARQUET_2_6:
diff --git a/cpp/src/parquet/type_fwd.h b/cpp/src/parquet/type_fwd.h
index cda0dc5a77..c2e902c41f 100644
--- a/cpp/src/parquet/type_fwd.h
+++ b/cpp/src/parquet/type_fwd.h
@@ -38,12 +38,6 @@ struct ParquetVersion {
     /// corresponding converted type.
     PARQUET_1_0,
 
-    /// DEPRECATED: Enable Parquet format 2.6 features
-    ///
-    /// This misleadingly named enum value is roughly similar to PARQUET_2_6.
-    PARQUET_2_0 ARROW_DEPRECATED_ENUM_VALUE("use PARQUET_2_4 or PARQUET_2_6 "
-                                            "for fine-grained feature 
selection"),
-
     /// Enable Parquet format 2.4 and earlier features when writing
     ///
     /// This enables UINT32 as well as logical types which don't have
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 1e3c89e4e7..e6de9712f8 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -134,7 +134,6 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" 
nogil:
 
     enum ParquetVersion" parquet::ParquetVersion::type":
         ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
-        ParquetVersion_V2_0" parquet::ParquetVersion::PARQUET_2_0"
         ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4"
         ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6"
 
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 6bc77ed795..55c2866243 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -991,8 +991,6 @@ cdef class FileMetaData(_Weakrefable):
         cdef ParquetVersion version = self._metadata.version()
         if version == ParquetVersion_V1:
             return '1.0'
-        elif version == ParquetVersion_V2_0:
-            return 'pseudo-2.0'
         elif version == ParquetVersion_V2_4:
             return '2.4'
         elif version == ParquetVersion_V2_6:
@@ -1888,12 +1886,6 @@ cdef shared_ptr[WriterProperties] 
_create_writer_properties(
     if version is not None:
         if version == "1.0":
             props.version(ParquetVersion_V1)
-        elif version in ("2.0", "pseudo-2.0"):
-            warnings.warn(
-                "Parquet format '2.0' pseudo version is deprecated, use "
-                "'2.4' or '2.6' for fine-grained feature selection",
-                FutureWarning, stacklevel=2)
-            props.version(ParquetVersion_V2_0)
         elif version == "2.4":
             props.version(ParquetVersion_V2_4)
         elif version == "2.6":
diff --git a/python/pyarrow/tests/parquet/test_data_types.py 
b/python/pyarrow/tests/parquet/test_data_types.py
index 1428f80239..855d5952b9 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -58,7 +58,7 @@ pytestmark = pytest.mark.parquet
 
 @pytest.mark.pandas
 @pytest.mark.parametrize('chunk_size', [None, 1000])
-def test_parquet_2_0_roundtrip(tempdir, chunk_size):
+def test_parquet_2_6_roundtrip(tempdir, chunk_size):
     df = alltypes_sample(size=10000, categorical=True)
 
     filename = tempdir / 'pandas_roundtrip.parquet'
diff --git a/python/pyarrow/tests/parquet/test_metadata.py 
b/python/pyarrow/tests/parquet/test_metadata.py
index cf17f830f2..b3340d93e4 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -554,7 +554,7 @@ def test_write_metadata(tempdir):
         assert b'ARROW:schema' not in schema_as_arrow.metadata
 
     # pass through writer keyword arguments
-    for version in ["1.0", "2.0", "2.4", "2.6"]:
+    for version in ["1.0", "2.4", "2.6"]:
         pq.write_metadata(schema, path, version=version)
         parquet_meta = pq.read_metadata(path)
         # The version is stored as a single integer in the Parquet metadata,
diff --git a/python/pyarrow/tests/parquet/test_pandas.py 
b/python/pyarrow/tests/parquet/test_pandas.py
index 2ea2f46873..58a39c1967 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -121,7 +121,7 @@ def test_pandas_parquet_column_multiindex(tempdir):
 
 
 @pytest.mark.pandas
-def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(tempdir):
+def test_pandas_parquet_2_roundtrip_read_pandas_no_index_written(tempdir):
     df = alltypes_sample(size=10000)
 
     filename = tempdir / 'pandas_roundtrip.parquet'
@@ -270,14 +270,12 @@ def test_pandas_parquet_configuration_options(tempdir):
 
 
 @pytest.mark.pandas
[email protected]("ignore:Parquet format '2.0':FutureWarning")
 def test_spark_flavor_preserves_pandas_metadata():
     df = _test_dataframe(size=100)
     df.index = np.arange(0, 10 * len(df), 10)
     df.index.name = 'foo'
 
-    result = _roundtrip_pandas_dataframe(df, {'version': '2.0',
-                                              'flavor': 'spark'})
+    result = _roundtrip_pandas_dataframe(df, {'flavor': 'spark'})
     tm.assert_frame_equal(result, df)
 
 
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 4ad04c9ad1..39757c1f4c 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4892,14 +4892,13 @@ def make_df_with_timestamps():
 
 
 @pytest.mark.parquet
[email protected]("ignore:Parquet format '2.0':FutureWarning")
 def test_timestamp_as_object_parquet(tempdir):
     # Timestamps can be stored as Parquet and reloaded into Pandas with no loss
     # of information if the timestamp_as_object option is True.
     df = make_df_with_timestamps()
     table = pa.Table.from_pandas(df)
     filename = tempdir / "timestamps_from_pandas.parquet"
-    pq.write_table(table, filename, version="2.0")
+    pq.write_table(table, filename)
     result = pq.read_table(filename)
     df2 = result.to_pandas(timestamp_as_object=True)
     tm.assert_frame_equal(df, df2)
diff --git a/r/R/enums.R b/r/R/enums.R
index 7dd5f2f858..98995b2a2e 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -129,7 +129,7 @@ FileType <- enum("FileType",
 #' @export
 #' @rdname enums
 ParquetVersionType <- enum("ParquetVersionType",
-  PARQUET_1_0 = 0L, PARQUET_2_0 = 1L, PARQUET_2_4 = 2L, PARQUET_2_6 = 3L
+  PARQUET_1_0 = 0L, PARQUET_2_4 = 2L, PARQUET_2_6 = 3L
 )
 
 #' @export
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 88ce1c7712..91ddfc63a2 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -98,8 +98,8 @@ read_parquet <- function(file,
 #'    the number of columns and number of rows), though if the data has fewer
 #'    than 250 million cells (rows x cols), then the total number of rows is
 #'    used.
-#' @param version parquet version: "1.0", "2.0" (deprecated), "2.4" (default),
-#'    "2.6", or "latest" (currently equivalent to 2.6). Numeric values are
+#' @param version parquet version: "1.0", "2.4" (default), "2.6", or
+#'    "latest" (currently equivalent to 2.6). Numeric values are
 #'    coerced to character.
 #' @param compression compression algorithm. Default "snappy". See details.
 #' @param compression_level compression level. Meaning depends on compression
@@ -232,7 +232,6 @@ ParquetArrowWriterProperties$create <- 
function(use_deprecated_int96_timestamps
 
 valid_parquet_version <- c(
   "1.0" = ParquetVersionType$PARQUET_1_0,
-  "2.0" = ParquetVersionType$PARQUET_2_0,
   "2.4" = ParquetVersionType$PARQUET_2_4,
   "2.6" = ParquetVersionType$PARQUET_2_6,
   "latest" = ParquetVersionType$PARQUET_2_6
@@ -252,15 +251,7 @@ make_valid_parquet_version <- function(version, 
valid_versions = valid_parquet_v
       call. = FALSE
     )
   }
-  out <- valid_versions[[arg_match(version, values = names(valid_versions))]]
-
-  if (identical(out, ParquetVersionType$PARQUET_2_0)) {
-    warning(
-      'Parquet format version "2.0" is deprecated. Use "2.4" or "2.6" to 
select format features.',
-      call. = FALSE
-    )
-  }
-  out
+  valid_versions[[arg_match(version, values = names(valid_versions))]]
 }
 
 #' @title ParquetWriterProperties class
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
index dd0ca944b8..e2f50dfc86 100644
--- a/r/man/enums.Rd
+++ b/r/man/enums.Rd
@@ -36,7 +36,7 @@ An object of class \code{Compression::type} (inherits from 
\code{arrow-enum}) of
 
 An object of class \code{FileType} (inherits from \code{arrow-enum}) of length 
4.
 
-An object of class \code{ParquetVersionType} (inherits from \code{arrow-enum}) 
of length 4.
+An object of class \code{ParquetVersionType} (inherits from \code{arrow-enum}) 
of length 3.
 
 An object of class \code{MetadataVersion} (inherits from \code{arrow-enum}) of 
length 5.
 
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index 954c692dad..859006571a 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -32,8 +32,8 @@ the number of columns and number of rows), though if the data 
has fewer
 than 250 million cells (rows x cols), then the total number of rows is
 used.}
 
-\item{version}{parquet version: "1.0", "2.0" (deprecated), "2.4" (default),
-"2.6", or "latest" (currently equivalent to 2.6). Numeric values are
+\item{version}{parquet version: "1.0", "2.4" (default), "2.6", or
+"latest" (currently equivalent to 2.6). Numeric values are
 coerced to character.}
 
 \item{compression}{compression algorithm. Default "snappy". See details.}
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index edca48e92d..e769aa9ea0 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -134,12 +134,6 @@ test_that("make_valid_parquet_version()", {
     make_valid_parquet_version("1.0"),
     ParquetVersionType$PARQUET_1_0
   )
-  expect_deprecated(
-    expect_equal(
-      make_valid_parquet_version("2.0"),
-      ParquetVersionType$PARQUET_2_0
-    )
-  )
   expect_equal(
     make_valid_parquet_version("2.4"),
     ParquetVersionType$PARQUET_2_4
@@ -154,9 +148,6 @@ test_that("make_valid_parquet_version()", {
   )
 
   expect_equal(make_valid_parquet_version(1), ParquetVersionType$PARQUET_1_0)
-  expect_deprecated(
-    expect_equal(make_valid_parquet_version(2), ParquetVersionType$PARQUET_2_0)
-  )
   expect_equal(make_valid_parquet_version(1.0), ParquetVersionType$PARQUET_1_0)
   expect_equal(make_valid_parquet_version(2.4), ParquetVersionType$PARQUET_2_4)
 })

Reply via email to