kevinjqliu commented on code in PR #590:
URL: https://github.com/apache/iceberg-python/pull/590#discussion_r1560374756


##########
column_name_test.py:
##########
@@ -0,0 +1,87 @@
+import os.path
+
+from pyiceberg.catalog.sql import SqlCatalog
+import pyarrow as pa
+import pandas as pd
+
+
+def sanitize_ch_names(ch_name: str) -> str:
+    """ Helper func to sanitize the column/channel names """
+    chars_to_replace = [":", ".", "-", "/"]
+    sanitized = ch_name
+    for char in chars_to_replace:
+        sanitized = sanitized.replace(char, "_")
+    sanitized = sanitized.lower()
+    return sanitized
+
+
+""" 
+Simple logic to create dataframe and save it to iceberg table.
+Showcases issues with column name in pyarrow unless sanitized
+"""
+
+# Verify warehouse folder exists
+if not os.path.exists("warehouse"):
+    os.mkdir("warehouse")
+
+data = {
+    'TEST:A1B2.RAW.ABC-GG-1-A': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 
9.0],
+    'TEST:A1B2.RAW.ABC-GG-1-B': [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 
9.9],
+    'TEST:A1B2.RAW.ABC-GG-1-C': [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 
9.9],
+    'time': [
+        1702090722998897808,
+        1702090722998947809,
+        1702090722998997809,
+        1702090722999047809,
+        1702090722999097809,
+        1702090722999147809,
+        1702090722999197809,
+        1702090722999247809,
+        1702090722999297809,
+        1702090722999347809
+    ]
+}
+
+df = pd.DataFrame(data)
+pa_data = pa.Table.from_pandas(df)
+
+"""
+Uncomment to sanitize the channel names and make it work.
+Delete the contents in warehouse folder and rerun.
+"""
+# ch_name_swap = dict()
+# for ch_name in pa_data.column_names:
+#     ch_name_swap[ch_name] = sanitize_ch_names(ch_name)
+# pa_data = pa_data.rename_columns(ch_name_swap.values())

Review Comment:
   just included this in as a way to reproduce #584, added a new test for 
writing parquet file with special character in column name



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to