kevinjqliu commented on code in PR #2601: URL: https://github.com/apache/iceberg-python/pull/2601#discussion_r2508338735
########## pyproject.toml: ########## @@ -14,147 +14,133 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -[tool.poetry] +[project] name = "pyiceberg" version = "0.10.0" -readme = "README.md" -homepage = "https://py.iceberg.apache.org/" -repository = "https://github.com/apache/iceberg-python" description = "Apache Iceberg is an open table format for huge analytic datasets" -authors = ["Apache Software Foundation <[email protected]>"] -license = "Apache License 2.0" +authors = [{ name = "Apache Software Foundation", email = "[email protected]" }] +requires-python = ">=3.10.0,<4.0.0" +readme = "README.md" +license = "Apache-2.0" +license-files = ["LICEN[CS]E*", "NOTICE*"] classifiers = [ - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "mmh3>=4.0.0,<6.0.0", + "requests>=2.20.0,<3.0.0", + "click>=7.1.1,<9.0.0", + "rich>=10.11.0,<15.0.0", + "strictyaml>=1.7.0,<2.0.0", # CVE-2020-14343 was fixed in 5.4. + "pydantic>=2.0,<3.0,!=2.4.0,!=2.4.1,!=2.12.0,!=2.12.1", # 2.4.0, 2.4.1, 2.12.0, 2.12.1 has a critical bug + "sortedcontainers==2.4.0", + "fsspec>=2023.1.0", + "pyparsing>=3.1.0,<4.0.0", + "tenacity>=8.2.3,<10.0.0", + "pyroaring>=1.0.0,<2.0.0", + "cachetools>=5.5,<7.0", + "zstandard>=0.13.0,<1.0.0" +] + +[project.optional-dependencies] +pyarrow = [ + "pyarrow>=17.0.0", + "pyiceberg-core>=0.5.1,<0.8.0", +] +pandas = [ + "pyiceberg[pyarrow]", + "pandas>=1.0.0,<3.0.0", +] +duckdb = [ + "pyiceberg[pyarrow]", + "duckdb>=0.5.0,<2.0.0", +] +ray = [ + "pyiceberg[pyarrow]", + "ray>=2.10.0,<=2.44.0", + "pandas>=1.0.0,<3.0.0", ] -packages = [ - { include = "pyiceberg" }, - { from = "vendor", include = "fb303" }, - { from = "vendor", include = "hive_metastore" }, - { include = "tests", format = "sdist" }, - { include = "Makefile", format = "sdist" }, - { include = "NOTICE", format = [ - "sdist", - "wheel", - ] }, +bodo = ["bodo>=2025.7.4"] +daft = ["daft>=0.5.0"] +polars = ["polars>=1.21.0,<2"] +snappy = ["python-snappy>=0.6.0,<1.0.0"] +hive = ["thrift>=0.13.0,<1.0.0"] +hive-kerberos = [ + "thrift>=0.13.0,<1.0.0", + "thrift-sasl>=0.4.3", + "kerberos>=1.3.1,<2", ] -include = [ - { path = "dev", format = "sdist" }, - { path = "pyiceberg/**/*.so", format = "wheel" }, - { path = "pyiceberg/**/*.pyd", format = "wheel" }, - { path = "poetry.lock", format = "sdist" }, +s3fs = ["s3fs>=2023.1.0"] +glue = ["boto3>=1.24.59"] +adlfs = ["adlfs>=2024.7.0"] +dynamodb = ["boto3>=1.24.59"] +bigquery = ["google-cloud-bigquery>=3.33.0,<4"] +sql-postgres = [ + "sqlalchemy>=2.0.18,<3", + "psycopg2-binary>=2.9.6", ] +sql-sqlite = ["sqlalchemy>=2.0.18,<3"] +gcsfs = ["gcsfs>=2023.1.0"] +rest-sigv4 = ["boto3>=1.24.59"] +hf = ["huggingface-hub>=0.24.0"] +pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.7.0"] Review Comment: ```suggestion pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.8.0"] ``` ########## dev/.rat-excludes: ########## @@ -3,5 +3,9 @@ build .git .gitignore -poetry.lock +uv.lock mkdocs/* +setup.cfg +(^|.*/)[^/]*\.egg-info(/.*)?$ +# Cython generated files +decoder_fast.c Review Comment: nit: should we exclude `setup.cfg` and `pyiceberg.egg-info` these from sdist instead? `decoder_fast.c` has the apache copyright headers, so we can safely remove from this list ########## pyproject.toml: ########## @@ -14,147 +14,133 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -[tool.poetry] +[project] name = "pyiceberg" version = "0.10.0" -readme = "README.md" -homepage = "https://py.iceberg.apache.org/" -repository = "https://github.com/apache/iceberg-python" description = "Apache Iceberg is an open table format for huge analytic datasets" -authors = ["Apache Software Foundation <[email protected]>"] -license = "Apache License 2.0" +authors = [{ name = "Apache Software Foundation", email = "[email protected]" }] +requires-python = ">=3.10.0,<4.0.0" +readme = "README.md" +license = "Apache-2.0" +license-files = ["LICEN[CS]E*", "NOTICE*"] classifiers = [ - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "mmh3>=4.0.0,<6.0.0", + "requests>=2.20.0,<3.0.0", + "click>=7.1.1,<9.0.0", + "rich>=10.11.0,<15.0.0", + "strictyaml>=1.7.0,<2.0.0", # CVE-2020-14343 was fixed in 5.4. + "pydantic>=2.0,<3.0,!=2.4.0,!=2.4.1,!=2.12.0,!=2.12.1", # 2.4.0, 2.4.1, 2.12.0, 2.12.1 has a critical bug + "sortedcontainers==2.4.0", + "fsspec>=2023.1.0", + "pyparsing>=3.1.0,<4.0.0", + "tenacity>=8.2.3,<10.0.0", + "pyroaring>=1.0.0,<2.0.0", + "cachetools>=5.5,<7.0", + "zstandard>=0.13.0,<1.0.0" +] + +[project.optional-dependencies] +pyarrow = [ + "pyarrow>=17.0.0", + "pyiceberg-core>=0.5.1,<0.8.0", +] +pandas = [ + "pyiceberg[pyarrow]", + "pandas>=1.0.0,<3.0.0", +] +duckdb = [ + "pyiceberg[pyarrow]", + "duckdb>=0.5.0,<2.0.0", +] +ray = [ + "pyiceberg[pyarrow]", Review Comment: nit: i think `pyiceberg[pyarrow]` here will install `pyiceberg-core` as well. For correctness, i think it might be better to go back to using `pyarrow>=17.0.0`. Even with the duplication, i think uv will edit all the instances at once ########## pyproject.toml: ########## @@ -14,147 +14,133 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -[tool.poetry] +[project] name = "pyiceberg" version = "0.10.0" -readme = "README.md" -homepage = "https://py.iceberg.apache.org/" -repository = "https://github.com/apache/iceberg-python" description = "Apache Iceberg is an open table format for huge analytic datasets" -authors = ["Apache Software Foundation <[email protected]>"] -license = "Apache License 2.0" +authors = [{ name = "Apache Software Foundation", email = "[email protected]" }] +requires-python = ">=3.10.0,<4.0.0" +readme = "README.md" +license = "Apache-2.0" +license-files = ["LICEN[CS]E*", "NOTICE*"] classifiers = [ - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "mmh3>=4.0.0,<6.0.0", + "requests>=2.20.0,<3.0.0", + "click>=7.1.1,<9.0.0", + "rich>=10.11.0,<15.0.0", + "strictyaml>=1.7.0,<2.0.0", # CVE-2020-14343 was fixed in 5.4. + "pydantic>=2.0,<3.0,!=2.4.0,!=2.4.1,!=2.12.0,!=2.12.1", # 2.4.0, 2.4.1, 2.12.0, 2.12.1 has a critical bug + "sortedcontainers==2.4.0", + "fsspec>=2023.1.0", + "pyparsing>=3.1.0,<4.0.0", + "tenacity>=8.2.3,<10.0.0", + "pyroaring>=1.0.0,<2.0.0", + "cachetools>=5.5,<7.0", + "zstandard>=0.13.0,<1.0.0" +] + +[project.optional-dependencies] +pyarrow = [ + "pyarrow>=17.0.0", + "pyiceberg-core>=0.5.1,<0.8.0", +] +pandas = [ + "pyiceberg[pyarrow]", + "pandas>=1.0.0,<3.0.0", +] +duckdb = [ + "pyiceberg[pyarrow]", + "duckdb>=0.5.0,<2.0.0", +] +ray = [ + "pyiceberg[pyarrow]", + "ray>=2.10.0,<=2.44.0", + "pandas>=1.0.0,<3.0.0", ] -packages = [ - { include = "pyiceberg" }, - { from = "vendor", include = "fb303" }, - { from = "vendor", include = "hive_metastore" }, - { include = "tests", format = "sdist" }, - { include = "Makefile", format = "sdist" }, - { include = "NOTICE", format = [ - "sdist", - "wheel", - ] }, +bodo = ["bodo>=2025.7.4"] +daft = ["daft>=0.5.0"] +polars = ["polars>=1.21.0,<2"] +snappy = ["python-snappy>=0.6.0,<1.0.0"] +hive = ["thrift>=0.13.0,<1.0.0"] +hive-kerberos = [ + "thrift>=0.13.0,<1.0.0", + "thrift-sasl>=0.4.3", + "kerberos>=1.3.1,<2", ] -include = [ - { path = "dev", format = "sdist" }, - { path = "pyiceberg/**/*.so", format = "wheel" }, - { path = "pyiceberg/**/*.pyd", format = "wheel" }, - { path = "poetry.lock", format = "sdist" }, +s3fs = ["s3fs>=2023.1.0"] +glue = ["boto3>=1.24.59"] +adlfs = ["adlfs>=2024.7.0"] +dynamodb = ["boto3>=1.24.59"] +bigquery = ["google-cloud-bigquery>=3.33.0,<4"] +sql-postgres = [ + "sqlalchemy>=2.0.18,<3", + "psycopg2-binary>=2.9.6", ] +sql-sqlite = ["sqlalchemy>=2.0.18,<3"] +gcsfs = ["gcsfs>=2023.1.0"] +rest-sigv4 = ["boto3>=1.24.59"] +hf = ["huggingface-hub>=0.24.0"] +pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.7.0"] Review Comment: also need to update the lock file ``` uv lock --upgrade-package pyiceberg-core==0.7.0 ``` ########## MANIFEST.in: ########## @@ -15,4 +15,18 @@ # specific language governing permissions and limitations # under the License. -graft src +# Include Cython source files for building from source +recursive-include pyiceberg *.pyx *.c + +# Include test files in sdist +recursive-include tests *.py Review Comment: diff between nightly and this branch shows `tests/table/bitmaps/*` files are missing, so maybe we should include everything in tests/ ```` Only in nightly/pyiceberg-0.10.0.dev20251109003122/tests/table: bitmaps ``` ```suggestion recursive-include tests ``` ########## MANIFEST.in: ########## @@ -15,4 +15,18 @@ # specific language governing permissions and limitations # under the License. -graft src +# Include Cython source files for building from source +recursive-include pyiceberg *.pyx *.c + +# Include test files in sdist +recursive-include tests *.py + +# Include development files +include Makefile +recursive-include dev * + +# Exclude build artifacts +prune .venv +prune build +prune dist +prune .pytest_cache Review Comment: ```suggestion prune .pytest_cache prune pyiceberg.egg-info prune setup.cfg ``` can we get rid of these `pyiceberg.egg-info` and `setup.cfg` from sdist? These are auto generated and we can also remove them from `dev/.rat-excludes` ########## MANIFEST.in: ########## @@ -15,4 +15,18 @@ # specific language governing permissions and limitations # under the License. -graft src +# Include Cython source files for building from source +recursive-include pyiceberg *.pyx *.c Review Comment: diff between nightly and this branch shows `decoder_fast.c` is newly added. i think we can get rid of `*.pyx *.c` ```suggestion recursive-include pyiceberg ``` ########## pyproject.toml: ########## @@ -14,147 +14,133 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -[tool.poetry] +[project] name = "pyiceberg" version = "0.10.0" -readme = "README.md" -homepage = "https://py.iceberg.apache.org/" -repository = "https://github.com/apache/iceberg-python" description = "Apache Iceberg is an open table format for huge analytic datasets" -authors = ["Apache Software Foundation <[email protected]>"] -license = "Apache License 2.0" +authors = [{ name = "Apache Software Foundation", email = "[email protected]" }] +requires-python = ">=3.10.0,<4.0.0" +readme = "README.md" +license = "Apache-2.0" +license-files = ["LICEN[CS]E*", "NOTICE*"] Review Comment: ```suggestion license-files = ["LICENSE", "NOTICE"] ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
