korbit-ai[bot] commented on code in PR #32556:
URL: https://github.com/apache/superset/pull/32556#discussion_r1986517126


##########
superset/db_engine_specs/odps.py:
##########
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional, TYPE_CHECKING
+
+from sqlalchemy import select, text
+from sqlalchemy.engine.base import Engine
+
+from superset.databases.schemas import (
+    TableMetadataColumnsResponse,
+    TableMetadataResponse,
+)
+from superset.databases.utils import (
+    get_col_type,
+    get_foreign_keys_metadata,
+    get_indexes_metadata,
+)
+from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin
+from superset.sql.parse import Partition, SQLScript
+from superset.sql_parse import Table
+from superset.superset_typing import ResultSetColumnType
+
+if TYPE_CHECKING:
+    from superset.models.core import Database
+
+logger = logging.getLogger(__name__)
+
+
+class OdpsBaseEngineSpec(BaseEngineSpec):
+    @classmethod
+    def get_table_metadata(
+        cls,
+        database: Database,
+        table: Table,
+        partition: Optional[Partition] = None,
+    ) -> TableMetadataResponse:
+        """
+        Returns basic table metadata
+        :param database: Database instance
+        :param table: A Table instance
+        :param partition: A Table partition info
+        :return: Basic table metadata
+        """
+        return cls.get_table_metadata(database, table, partition)
+
+
+class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
+    default_driver = "odps"
+
+    @classmethod
+    def get_table_metadata(
+        cls, database: Any, table: Table, partition: Optional[Partition] = None
+    ) -> TableMetadataResponse:
+        """
+        Get table metadata information, including type, pk, fks.
+        This function raises SQLAlchemyError when a schema is not found.
+
+        :param partition: The table's partition info
+        :param database: The database model
+        :param table: Table instance
+        :return: Dict table metadata ready for API response
+        """
+        keys = []
+        columns = database.get_columns(table)
+        primary_key = database.get_pk_constraint(table)
+        if primary_key and primary_key.get("constrained_columns"):
+            primary_key["column_names"] = 
primary_key.pop("constrained_columns")
+            primary_key["type"] = "pk"
+            keys += [primary_key]
+        foreign_keys = get_foreign_keys_metadata(database, table)
+        indexes = get_indexes_metadata(database, table)
+        keys += foreign_keys + indexes
+        payload_columns: list[TableMetadataColumnsResponse] = []
+        table_comment = database.get_table_comment(table)
+        for col in columns:
+            dtype = get_col_type(col)
+            payload_columns.append(
+                {
+                    "name": col["column_name"],
+                    "type": dtype.split("(")[0] if "(" in dtype else dtype,
+                    "longType": dtype,
+                    "keys": [
+                        k for k in keys if col["column_name"] in 
k["column_names"]
+                    ],
+                    "comment": col.get("comment"),
+                }
+            )
+
+        with database.get_sqla_engine(
+            catalog=table.catalog, schema=table.schema
+        ) as engine:
+            return {
+                "name": table.table,
+                "columns": payload_columns,
+                "selectStar": cls.select_star(
+                    database=database,
+                    table=table,
+                    engine=engine,
+                    limit=100,
+                    show_cols=False,
+                    indent=True,
+                    latest_partition=True,
+                    cols=columns,
+                    partition=partition,
+                ),
+                "primaryKey": primary_key,
+                "foreignKeys": foreign_keys,
+                "indexes": keys,
+                "comment": table_comment,
+            }
+
+    @classmethod
+    def select_star(  # pylint: disable=too-many-arguments
+        cls,
+        database: Database,
+        table: Table,
+        engine: Engine,
+        limit: int = 100,
+        show_cols: bool = False,
+        indent: bool = True,
+        latest_partition: bool = True,
+        cols: list[ResultSetColumnType] | None = None,
+        partition: Optional[Partition] = None,
+    ) -> str:
+        """
+        Generate a "SELECT * from [schema.]table_name" query with appropriate 
limit.
+
+        WARNING: expects only unquoted table and schema names.
+
+        :param partition: The table's partition info
+        :param database: Database instance
+        :param table: Table instance
+        :param engine: SqlAlchemy Engine instance
+        :param limit: limit to impose on query
+        :param show_cols: Show columns in query; otherwise use "*"
+        :param indent: Add indentation to query
+        :param latest_partition: Only query the latest partition
+        :param cols: Columns to include in query
+        :return: SQL query
+        """
+        # pylint: disable=redefined-outer-name
+        fields: str | list[Any] = "*"
+        cols = cols or []
+        if (show_cols or latest_partition) and not cols:
+            cols = database.get_columns(table)

Review Comment:
   ### Redundant Column Metadata Fetch <sub>![category 
Performance](https://img.shields.io/badge/Performance-4f46e5)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   Database column metadata is fetched redundantly in select_star() when cols 
are already available from the get_table_metadata() call
   
   ###### Why this matters
   This causes an unnecessary extra database round-trip to fetch the same 
column metadata that was already retrieved earlier in the call chain
   
   ###### Suggested change ∙ *Feature Preview*
   Pass the previously fetched columns from get_table_metadata() to 
select_star() instead of retrieving them again. Modify the code to:
   ```python
   cols = cols or columns  # Use columns passed from get_table_metadata
   ```
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/2e6f02d0-9773-4c14-9cd5-0f13a1db6614?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:7c56619e-2b94-4a9e-b599-2058c9518235 -->
   



##########
superset/databases/api.py:
##########
@@ -1055,15 +1057,21 @@ def table_metadata(self, pk: int) -> FlaskResponse:
             parameters = QualifiedTableSchema().load(request.args)
         except ValidationError as ex:
             raise InvalidPayloadSchemaError(ex) from ex
-
-        table = Table(parameters["name"], parameters["schema"], 
parameters["catalog"])
+        table_name = str(parameters["name"])
+        table = Table(table_name, parameters["schema"], parameters["catalog"])
+        is_partitioned_table, partition_fields = 
DatabaseDAO.is_odps_partitioned_table(
+            database, table_name
+        )
         try:
             security_manager.raise_for_access(database=database, table=table)
         except SupersetSecurityException as ex:
             # instead of raising 403, raise 404 to hide table existence
             raise TableNotFoundException("No such table") from ex
-
-        payload = database.db_engine_spec.get_table_metadata(database, table)
+        partition = Partition(is_partitioned_table, partition_fields)
+        if is_partitioned_table:
+            payload = OdpsEngineSpec.get_table_metadata(database, table, 
partition)
+        else:
+            payload = database.db_engine_spec.get_table_metadata(database, 
table)

Review Comment:
   ### Tight coupling with concrete engine implementation <sub>![category 
Design](https://img.shields.io/badge/Design-0d9488)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   Direct usage of OdpsEngineSpec creates tight coupling and violates the 
Open-Closed Principle. The code specifically checks for a partition case and 
directly calls a concrete implementation.
   
   ###### Why this matters
   If new partition handling is needed for other database engines, the code 
will require modification. This makes the code harder to extend and maintain.
   
   ###### Suggested change ∙ *Feature Preview*
   Move partition handling logic into the engine spec base class:
   ```python
   payload = database.db_engine_spec.get_table_metadata(database, table, 
partition=partition)
   ```
   And let each engine spec implementation handle partitions appropriately.
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/02f013e6-11d8-48d1-811b-1cf88a554771?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:966f872d-64f3-4090-bce5-0829e9205a9c -->
   



##########
superset/daos/database.py:
##########
@@ -166,6 +170,37 @@ def get_ssh_tunnel(cls, database_id: int) -> SSHTunnel | 
None:
 
         return ssh_tunnel
 
+    @classmethod
+    def is_odps_partitioned_table(
+        cls, database: Database, table_name: str
+    ) -> Tuple[bool, List[str]]:
+        """
+        This function is used to determine and retrieve
+        partition information of the odsp table.
+        The return values are whether the partition
+        table is partitioned and the names of all partition fields.
+        """
+        if not database:
+            raise ValueError("Database not found")
+        uri = database.sqlalchemy_uri
+        access_key = database.password
+        pattern = re.compile(
+            
r"odps://(?P<username>[^:]+):(?P<password>[^@]+)@(?P<project>[^/]+)/(?:\?endpoint=(?P<endpoint>[^&]+))"
+        )
+        if match := pattern.match(unquote(uri)):

Review Comment:
   ### Overly Strict ODPS URI Pattern Matching <sub>![category 
Functionality](https://img.shields.io/badge/Functionality-0284c7)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The regex pattern only matches ODPS URIs with a specific format, but does 
not handle variations in the URI structure that might include additional 
parameters or different orderings.
   
   ###### Why this matters
   This could cause the function to fail for valid ODPS URIs that have 
different parameter orderings or additional query parameters, leading to false 
negatives in partition detection.
   
   ###### Suggested change ∙ *Feature Preview*
   Modify the regex pattern to be more flexible and handle different URI 
formats:
   ```python
   pattern = re.compile(
       
r'odps://(?P<username>[^:]+):(?P<password>[^@]+)@(?P<project>[^/?]+).*?[?&]endpoint=(?P<endpoint>[^&]+)'
   )
   ```
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/eb1367b3-93b1-4494-8ddb-22e926e62814?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:a34aa7df-2ac7-4599-bb57-c6fbb397eb4a -->
   



##########
superset/databases/api.py:
##########
@@ -1055,15 +1057,21 @@
             parameters = QualifiedTableSchema().load(request.args)
         except ValidationError as ex:
             raise InvalidPayloadSchemaError(ex) from ex
-
-        table = Table(parameters["name"], parameters["schema"], 
parameters["catalog"])
+        table_name = str(parameters["name"])
+        table = Table(table_name, parameters["schema"], parameters["catalog"])
+        is_partitioned_table, partition_fields = 
DatabaseDAO.is_odps_partitioned_table(
+            database, table_name
+        )

Review Comment:
   ### Database-specific logic in generic DAO <sub>![category 
Design](https://img.shields.io/badge/Design-0d9488)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The method name suggests it's specific to ODPS but resides in the generic 
DatabaseDAO class
   
   ###### Why this matters
   Mixing database-specific logic in a generic DAO violates Single 
Responsibility Principle and makes the code less maintainable.
   
   ###### Suggested change ∙ *Feature Preview*
   Move database-specific logic to appropriate specialized classes:
   ```python
   class OdpsDAO(DatabaseDAO):
       def is_partitioned_table(self, database, table_name):
           # ODPS specific logic
   ```
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/c4f81959-3282-47a9-8a87-b16846be3d95?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:fb190ae4-139f-409b-95c1-42b084c15f84 -->
   



##########
superset/db_engine_specs/odps.py:
##########
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional, TYPE_CHECKING
+
+from sqlalchemy import select, text
+from sqlalchemy.engine.base import Engine
+
+from superset.databases.schemas import (
+    TableMetadataColumnsResponse,
+    TableMetadataResponse,
+)
+from superset.databases.utils import (
+    get_col_type,
+    get_foreign_keys_metadata,
+    get_indexes_metadata,
+)
+from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin
+from superset.sql.parse import Partition, SQLScript
+from superset.sql_parse import Table
+from superset.superset_typing import ResultSetColumnType
+
+if TYPE_CHECKING:
+    from superset.models.core import Database
+
+logger = logging.getLogger(__name__)
+
+
+class OdpsBaseEngineSpec(BaseEngineSpec):
+    @classmethod
+    def get_table_metadata(
+        cls,
+        database: Database,
+        table: Table,
+        partition: Optional[Partition] = None,
+    ) -> TableMetadataResponse:
+        """
+        Returns basic table metadata
+        :param database: Database instance
+        :param table: A Table instance
+        :param partition: A Table partition info
+        :return: Basic table metadata
+        """
+        return cls.get_table_metadata(database, table, partition)
+
+
+class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
+    default_driver = "odps"
+
+    @classmethod
+    def get_table_metadata(
+        cls, database: Any, table: Table, partition: Optional[Partition] = None
+    ) -> TableMetadataResponse:
+        """
+        Get table metadata information, including type, pk, fks.
+        This function raises SQLAlchemyError when a schema is not found.
+
+        :param partition: The table's partition info
+        :param database: The database model
+        :param table: Table instance
+        :return: Dict table metadata ready for API response
+        """
+        keys = []
+        columns = database.get_columns(table)
+        primary_key = database.get_pk_constraint(table)
+        if primary_key and primary_key.get("constrained_columns"):
+            primary_key["column_names"] = 
primary_key.pop("constrained_columns")
+            primary_key["type"] = "pk"
+            keys += [primary_key]
+        foreign_keys = get_foreign_keys_metadata(database, table)
+        indexes = get_indexes_metadata(database, table)
+        keys += foreign_keys + indexes
+        payload_columns: list[TableMetadataColumnsResponse] = []
+        table_comment = database.get_table_comment(table)
+        for col in columns:
+            dtype = get_col_type(col)
+            payload_columns.append(
+                {
+                    "name": col["column_name"],
+                    "type": dtype.split("(")[0] if "(" in dtype else dtype,
+                    "longType": dtype,
+                    "keys": [
+                        k for k in keys if col["column_name"] in 
k["column_names"]
+                    ],
+                    "comment": col.get("comment"),
+                }
+            )
+
+        with database.get_sqla_engine(
+            catalog=table.catalog, schema=table.schema
+        ) as engine:
+            return {
+                "name": table.table,
+                "columns": payload_columns,
+                "selectStar": cls.select_star(
+                    database=database,
+                    table=table,
+                    engine=engine,
+                    limit=100,
+                    show_cols=False,
+                    indent=True,
+                    latest_partition=True,
+                    cols=columns,
+                    partition=partition,
+                ),
+                "primaryKey": primary_key,
+                "foreignKeys": foreign_keys,
+                "indexes": keys,
+                "comment": table_comment,
+            }
+
+    @classmethod
+    def select_star(  # pylint: disable=too-many-arguments
+        cls,
+        database: Database,
+        table: Table,
+        engine: Engine,
+        limit: int = 100,
+        show_cols: bool = False,
+        indent: bool = True,
+        latest_partition: bool = True,
+        cols: list[ResultSetColumnType] | None = None,
+        partition: Optional[Partition] = None,
+    ) -> str:
+        """
+        Generate a "SELECT * from [schema.]table_name" query with appropriate 
limit.
+
+        WARNING: expects only unquoted table and schema names.
+
+        :param partition: The table's partition info
+        :param database: Database instance
+        :param table: Table instance
+        :param engine: SqlAlchemy Engine instance
+        :param limit: limit to impose on query
+        :param show_cols: Show columns in query; otherwise use "*"
+        :param indent: Add indentation to query
+        :param latest_partition: Only query the latest partition
+        :param cols: Columns to include in query
+        :return: SQL query
+        """
+        # pylint: disable=redefined-outer-name
+        fields: str | list[Any] = "*"
+        cols = cols or []
+        if (show_cols or latest_partition) and not cols:
+            cols = database.get_columns(table)
+
+        if show_cols:
+            fields = cls._get_fields(cols)
+        print()
+        full_table_name = cls.quote_table(table, engine.dialect)
+        qry = select(fields).select_from(text(full_table_name))
+        if database.backend == "odps":
+            if (
+                partition is not None
+                and partition.is_partitioned_table
+                and partition.partition_column is not None
+                and len(partition.partition_column) > 0
+            ):
+                partition_str = partition.partition_column[0]
+                partition_str_where = f"CAST({partition_str} AS STRING) LIKE 
'%'"

Review Comment:
   ### Ineffective Partition Filtering <sub>![category 
Functionality](https://img.shields.io/badge/Functionality-0284c7)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The partition filtering logic uses a LIKE '%' condition which will always be 
true, effectively not filtering the partitions at all.
   
   ###### Why this matters
   This defeats the purpose of partition filtering and may return more data 
than intended, potentially causing performance issues.
   
   ###### Suggested change ∙ *Feature Preview*
   Implement proper partition filtering logic that actually restricts the data 
based on the partition value:
   ```python
   partition_str = partition.partition_column[0]
   partition_value = partition.partition_values[0] if 
partition.partition_values else None
   if partition_value:
       partition_str_where = f"{partition_str} = '{partition_value}'"
       qry = qry.where(text(partition_str_where))
   ```
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/26838762-5fa1-41e9-9343-3b771dfb1ddc?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:dd72d5f1-ac3e-4810-bad2-719bad7a70df -->
   



##########
superset/db_engine_specs/odps.py:
##########
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional, TYPE_CHECKING
+
+from sqlalchemy import select, text
+from sqlalchemy.engine.base import Engine
+
+from superset.databases.schemas import (
+    TableMetadataColumnsResponse,
+    TableMetadataResponse,
+)
+from superset.databases.utils import (
+    get_col_type,
+    get_foreign_keys_metadata,
+    get_indexes_metadata,
+)
+from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin
+from superset.sql.parse import Partition, SQLScript
+from superset.sql_parse import Table
+from superset.superset_typing import ResultSetColumnType
+
+if TYPE_CHECKING:
+    from superset.models.core import Database
+
+logger = logging.getLogger(__name__)
+
+
+class OdpsBaseEngineSpec(BaseEngineSpec):
+    @classmethod
+    def get_table_metadata(
+        cls,
+        database: Database,
+        table: Table,
+        partition: Optional[Partition] = None,
+    ) -> TableMetadataResponse:
+        """
+        Returns basic table metadata
+        :param database: Database instance
+        :param table: A Table instance
+        :param partition: A Table partition info
+        :return: Basic table metadata
+        """
+        return cls.get_table_metadata(database, table, partition)
+
+
+class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
+    default_driver = "odps"
+
+    @classmethod
+    def get_table_metadata(
+        cls, database: Any, table: Table, partition: Optional[Partition] = None
+    ) -> TableMetadataResponse:
+        """
+        Get table metadata information, including type, pk, fks.
+        This function raises SQLAlchemyError when a schema is not found.
+
+        :param partition: The table's partition info
+        :param database: The database model
+        :param table: Table instance
+        :return: Dict table metadata ready for API response
+        """
+        keys = []
+        columns = database.get_columns(table)
+        primary_key = database.get_pk_constraint(table)
+        if primary_key and primary_key.get("constrained_columns"):
+            primary_key["column_names"] = 
primary_key.pop("constrained_columns")
+            primary_key["type"] = "pk"
+            keys += [primary_key]
+        foreign_keys = get_foreign_keys_metadata(database, table)
+        indexes = get_indexes_metadata(database, table)
+        keys += foreign_keys + indexes
+        payload_columns: list[TableMetadataColumnsResponse] = []
+        table_comment = database.get_table_comment(table)
+        for col in columns:
+            dtype = get_col_type(col)
+            payload_columns.append(
+                {
+                    "name": col["column_name"],
+                    "type": dtype.split("(")[0] if "(" in dtype else dtype,
+                    "longType": dtype,
+                    "keys": [
+                        k for k in keys if col["column_name"] in 
k["column_names"]
+                    ],
+                    "comment": col.get("comment"),
+                }
+            )
+
+        with database.get_sqla_engine(
+            catalog=table.catalog, schema=table.schema
+        ) as engine:
+            return {
+                "name": table.table,
+                "columns": payload_columns,
+                "selectStar": cls.select_star(
+                    database=database,
+                    table=table,
+                    engine=engine,
+                    limit=100,
+                    show_cols=False,
+                    indent=True,
+                    latest_partition=True,
+                    cols=columns,
+                    partition=partition,
+                ),
+                "primaryKey": primary_key,
+                "foreignKeys": foreign_keys,
+                "indexes": keys,
+                "comment": table_comment,
+            }
+
+    @classmethod
+    def select_star(  # pylint: disable=too-many-arguments
+        cls,
+        database: Database,
+        table: Table,
+        engine: Engine,
+        limit: int = 100,
+        show_cols: bool = False,
+        indent: bool = True,
+        latest_partition: bool = True,
+        cols: list[ResultSetColumnType] | None = None,
+        partition: Optional[Partition] = None,
+    ) -> str:
+        """
+        Generate a "SELECT * from [schema.]table_name" query with appropriate 
limit.
+
+        WARNING: expects only unquoted table and schema names.
+
+        :param partition: The table's partition info
+        :param database: Database instance
+        :param table: Table instance
+        :param engine: SqlAlchemy Engine instance
+        :param limit: limit to impose on query
+        :param show_cols: Show columns in query; otherwise use "*"
+        :param indent: Add indentation to query
+        :param latest_partition: Only query the latest partition
+        :param cols: Columns to include in query
+        :return: SQL query
+        """
+        # pylint: disable=redefined-outer-name
+        fields: str | list[Any] = "*"
+        cols = cols or []
+        if (show_cols or latest_partition) and not cols:
+            cols = database.get_columns(table)
+
+        if show_cols:
+            fields = cls._get_fields(cols)
+        print()

Review Comment:
   ### Unnecessary Debug Print Statement <sub>![category 
Logging](https://img.shields.io/badge/Logging-4f46e5)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   There is a stray print() statement in the select_star method that serves no 
purpose.
   
   ###### Why this matters
   This will unnecessarily print a blank line to stdout every time a query is 
generated, potentially cluttering logs and console output.
   
   ###### Suggested change ∙ *Feature Preview*
   Remove the print() statement:
   ```python
   full_table_name = cls.quote_table(table, engine.dialect)
   ```
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/1d876a0a-8509-4b9d-bcda-2a79b18ab53e?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:a23b8cfc-ed7a-44ae-96a5-3b53b9ea2fc0 -->
   



##########
superset/db_engine_specs/odps.py:
##########
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional, TYPE_CHECKING
+
+from sqlalchemy import select, text
+from sqlalchemy.engine.base import Engine
+
+from superset.databases.schemas import (
+    TableMetadataColumnsResponse,
+    TableMetadataResponse,
+)
+from superset.databases.utils import (
+    get_col_type,
+    get_foreign_keys_metadata,
+    get_indexes_metadata,
+)
+from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin
+from superset.sql.parse import Partition, SQLScript
+from superset.sql_parse import Table
+from superset.superset_typing import ResultSetColumnType
+
+if TYPE_CHECKING:
+    from superset.models.core import Database
+
+logger = logging.getLogger(__name__)
+
+
+class OdpsBaseEngineSpec(BaseEngineSpec):
+    @classmethod
+    def get_table_metadata(
+        cls,
+        database: Database,
+        table: Table,
+        partition: Optional[Partition] = None,
+    ) -> TableMetadataResponse:
+        """
+        Returns basic table metadata
+        :param database: Database instance
+        :param table: A Table instance
+        :param partition: A Table partition info
+        :return: Basic table metadata
+        """
+        return cls.get_table_metadata(database, table, partition)
+
+
+class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
+    default_driver = "odps"
+
+    @classmethod
+    def get_table_metadata(
+        cls, database: Any, table: Table, partition: Optional[Partition] = None
+    ) -> TableMetadataResponse:
+        """
+        Get table metadata information, including type, pk, fks.
+        This function raises SQLAlchemyError when a schema is not found.
+
+        :param partition: The table's partition info
+        :param database: The database model
+        :param table: Table instance
+        :return: Dict table metadata ready for API response
+        """
+        keys = []
+        columns = database.get_columns(table)
+        primary_key = database.get_pk_constraint(table)
+        if primary_key and primary_key.get("constrained_columns"):
+            primary_key["column_names"] = 
primary_key.pop("constrained_columns")
+            primary_key["type"] = "pk"
+            keys += [primary_key]
+        foreign_keys = get_foreign_keys_metadata(database, table)
+        indexes = get_indexes_metadata(database, table)
+        keys += foreign_keys + indexes
+        payload_columns: list[TableMetadataColumnsResponse] = []
+        table_comment = database.get_table_comment(table)
+        for col in columns:
+            dtype = get_col_type(col)
+            payload_columns.append(
+                {
+                    "name": col["column_name"],
+                    "type": dtype.split("(")[0] if "(" in dtype else dtype,
+                    "longType": dtype,
+                    "keys": [
+                        k for k in keys if col["column_name"] in 
k["column_names"]
+                    ],
+                    "comment": col.get("comment"),
+                }
+            )
+
+        with database.get_sqla_engine(
+            catalog=table.catalog, schema=table.schema
+        ) as engine:
+            return {
+                "name": table.table,
+                "columns": payload_columns,
+                "selectStar": cls.select_star(
+                    database=database,
+                    table=table,
+                    engine=engine,
+                    limit=100,
+                    show_cols=False,
+                    indent=True,
+                    latest_partition=True,
+                    cols=columns,
+                    partition=partition,
+                ),
+                "primaryKey": primary_key,
+                "foreignKeys": foreign_keys,
+                "indexes": keys,
+                "comment": table_comment,
+            }
+
+    @classmethod
+    def select_star(  # pylint: disable=too-many-arguments
+        cls,
+        database: Database,
+        table: Table,
+        engine: Engine,
+        limit: int = 100,
+        show_cols: bool = False,
+        indent: bool = True,
+        latest_partition: bool = True,
+        cols: list[ResultSetColumnType] | None = None,
+        partition: Optional[Partition] = None,
+    ) -> str:
+        """
+        Generate a "SELECT * from [schema.]table_name" query with appropriate 
limit.
+
+        WARNING: expects only unquoted table and schema names.
+
+        :param partition: The table's partition info
+        :param database: Database instance
+        :param table: Table instance
+        :param engine: SqlAlchemy Engine instance
+        :param limit: limit to impose on query
+        :param show_cols: Show columns in query; otherwise use "*"
+        :param indent: Add indentation to query
+        :param latest_partition: Only query the latest partition
+        :param cols: Columns to include in query
+        :return: SQL query
+        """
+        # pylint: disable=redefined-outer-name
+        fields: str | list[Any] = "*"
+        cols = cols or []
+        if (show_cols or latest_partition) and not cols:
+            cols = database.get_columns(table)
+
+        if show_cols:
+            fields = cls._get_fields(cols)
+        print()
+        full_table_name = cls.quote_table(table, engine.dialect)
+        qry = select(fields).select_from(text(full_table_name))
+        if database.backend == "odps":
+            if (
+                partition is not None
+                and partition.is_partitioned_table
+                and partition.partition_column is not None
+                and len(partition.partition_column) > 0
+            ):
+                partition_str = partition.partition_column[0]
+                partition_str_where = f"CAST({partition_str} AS STRING) LIKE 
'%'"
+                qry = qry.where(text(partition_str_where))

Review Comment:
   ### SQL Injection Risk in Partition Query Construction <sub>![category 
Security](https://img.shields.io/badge/Security-e11d48)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   Direct string interpolation of partition_str into an SQL query creates a 
potential SQL injection vulnerability.
   
   ###### Why this matters
   If partition_str contains malicious SQL code, it could be executed since the 
value is directly interpolated into the query string without proper escaping or 
parameterization.
   
   ###### Suggested change ∙ *Feature Preview*
   Use SQLAlchemy's parameterized queries:
   ```python
   partition_str = partition.partition_column[0]
   qry = qry.where(text("CAST(:partition_str AS STRING) LIKE 
'%'").bindparams(partition_str=partition_str))
   ```
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/71e6b494-1f9a-48e1-9159-2caa26f342e6?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:95453f4c-adc4-4270-9287-4ed9758ebb40 -->
   



##########
superset/daos/database.py:
##########
@@ -166,6 +170,37 @@
 
         return ssh_tunnel
 
+    @classmethod
+    def is_odps_partitioned_table(
+        cls, database: Database, table_name: str
+    ) -> Tuple[bool, List[str]]:
+        """
+        This function is used to determine and retrieve
+        partition information of the odsp table.
+        The return values are whether the partition
+        table is partitioned and the names of all partition fields.
+        """
+        if not database:
+            raise ValueError("Database not found")
+        uri = database.sqlalchemy_uri
+        access_key = database.password
+        pattern = re.compile(
+            
r"odps://(?P<username>[^:]+):(?P<password>[^@]+)@(?P<project>[^/]+)/(?:\?endpoint=(?P<endpoint>[^&]+))"
+        )

Review Comment:
   ### Undocumented Complex Regex Pattern <sub>![category 
Readability](https://img.shields.io/badge/Readability-0284c7)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   Complex regex pattern without inline comments explaining the pattern 
components and their purpose
   
   ###### Why this matters
   Without explanation, future maintainers will need to decipher the regex 
pattern, which increases cognitive load and maintenance time
   
   ###### Suggested change ∙ *Feature Preview*
   ```python
   # Pattern to extract ODPS connection components from URI
   # Format: odps://username:password@project/?endpoint=endpoint_url
   pattern = re.compile(
       r"odps://"               # Protocol
       r"(?P<username>[^:]+)"   # Username until ':'
       r":"
       r"(?P<password>[^@]+)"   # Password until '@'
       r"@"
       r"(?P<project>[^/]+)"    # Project name until '/'
       r"/"
       r"(?:\?endpoint="       # Endpoint parameter
       r"(?P<endpoint>[^&]+))"  # Endpoint value until '&' or end
   )
   ```
   
   
   </details>
   
   <sub>
   
   [![Report a problem with this 
comment](https://img.shields.io/badge/Report%20a%20problem%20with%20this%20comment-gray.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmNWVjMDAiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIiBjbGFzcz0ibHVjaWRlIGx1Y2lkZS10cmlhbmdsZS1hbGVydCI+PHBhdGggZD0ibTIxLjczIDE4LTgtMTRhMiAyIDAgMCAwLTMuNDggMGwtOCAxNEEyIDIgMCAwIDAgNCAyMWgxNmEyIDIgMCAwIDAgMS43My0zIi8+PHBhdGggZD0iTTEyIDl2NCIvPjxwYXRoIGQ9Ik0xMiAxN2guMDEiLz48L3N2Zz4=)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/1296b488-4131-4336-b580-a053b89fad4d?suggestedFixEnabled=true)
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:fb3b8162-ce56-43ce-a64b-9e51910621cb -->
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to