luizotavio32 commented on code in PR #34763:
URL: https://github.com/apache/superset/pull/34763#discussion_r2323226002


##########
superset/commands/database/uploaders/csv_reader.py:
##########
@@ -123,6 +123,96 @@ def _select_optimal_engine() -> str:
             )
             return "c"
 
+    @staticmethod
+    def _find_invalid_values_numeric(df: pd.DataFrame, column: str) -> 
pd.Series:
+        """Find invalid values for numeric type conversion."""
+        converted = pd.to_numeric(df[column], errors="coerce")
+        return converted.isna() & df[column].notna()
+
+    @staticmethod
+    def _find_invalid_values_non_numeric(
+        df: pd.DataFrame, column: str, dtype: str
+    ) -> pd.Series:
+        """Find invalid values for non-numeric type conversion."""
+        invalid_mask = pd.Series([False] * len(df), index=df.index)
+        for idx, value in df[column].items():
+            if pd.notna(value):
+                try:
+                    pd.Series([value]).astype(dtype)
+                except (ValueError, TypeError):
+                    invalid_mask[idx] = True
+                    break

Review Comment:
   Maybe we should define a limit of how many errors to output to the user, 
imagine a column with hundreds of wrong type values
    



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to