gwindes commented on code in PR #590:
URL: https://github.com/apache/iceberg-python/pull/590#discussion_r1558403974


##########
column_name_test.py:
##########
@@ -0,0 +1,87 @@
+import os.path
+
+from pyiceberg.catalog.sql import SqlCatalog
+import pyarrow as pa
+import pandas as pd
+
+
+def sanitize_ch_names(ch_name: str) -> str:
+    """ Helper func to sanitize the column/channel names """
+    chars_to_replace = [":", ".", "-", "/"]
+    sanitized = ch_name
+    for char in chars_to_replace:
+        sanitized = sanitized.replace(char, "_")
+    sanitized = sanitized.lower()
+    return sanitized
+
+
+""" 
+Simple logic to create dataframe and save it to iceberg table.
+Showcases issues with column name in pyarrow unless sanitized
+"""
+
+# Verify warehouse folder exists
+if not os.path.exists("warehouse"):
+    os.mkdir("warehouse")
+
+data = {
+    'TEST:A1B2.RAW.ABC-GG-1-A': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 
9.0],
+    'TEST:A1B2.RAW.ABC-GG-1-B': [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 
9.9],
+    'TEST:A1B2.RAW.ABC-GG-1-C': [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 
9.9],
+    'time': [
+        1702090722998897808,
+        1702090722998947809,
+        1702090722998997809,
+        1702090722999047809,
+        1702090722999097809,
+        1702090722999147809,
+        1702090722999197809,
+        1702090722999247809,
+        1702090722999297809,
+        1702090722999347809
+    ]
+}
+
+df = pd.DataFrame(data)
+pa_data = pa.Table.from_pandas(df)
+
+"""
+Uncomment to sanitize the channel names and make it work.
+Delete the contents in warehouse folder and rerun.
+"""
+# ch_name_swap = dict()
+# for ch_name in pa_data.column_names:
+#     ch_name_swap[ch_name] = sanitize_ch_names(ch_name)
+# pa_data = pa_data.rename_columns(ch_name_swap.values())

Review Comment:
   Might be safe to remove this commented out snippet if not needed unless 
being used for general context?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to