Fokko commented on code in PR #369:
URL: https://github.com/apache/iceberg-go/pull/369#discussion_r2021749556


##########
table/internal/parquet_files.go:
##########
@@ -170,6 +172,86 @@ func (parquetFormat) PrimitiveTypeToPhysicalType(typ 
iceberg.PrimitiveType) stri
        }
 }
 
+func (parquetFormat) GetWriteProperties(props iceberg.Properties) any {
+       writerProps := []parquet.WriterProperty{
+               parquet.WithDictionaryDefault(false),
+               
parquet.WithMaxRowGroupLength(int64(props.GetInt("write.parquet.row-group-limit",
 1048576))),
+               
parquet.WithDataPageSize(int64(props.GetInt("write.parquet.page-size-bytes", 
1024*1024))),
+               parquet.WithDataPageVersion(parquet.DataPageV2),
+               
parquet.WithBatchSize(int64(props.GetInt("write.parquet.page-row-limit", 
20000))),
+               
parquet.WithDictionaryPageSizeLimit(int64(props.GetInt("write.parquet.dict-size-bytes",
 2*1024*1024))),

Review Comment:
   Should we move these in constants? Is that common in Go?



##########
table/table_test.go:
##########
@@ -835,6 +839,271 @@ func (t *TableWritingTestSuite) TestReplaceDataFiles() {
        }, staged.CurrentSnapshot().Summary)
 }
 
+func (t *TableWritingTestSuite) TestWriteSpecialCharacterColumn() {
+       ident := table.Identifier{"default", "write_special_character_column"}
+       colNameWithSpecialChar := "letter/abc"
+
+       s := iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: colNameWithSpecialChar, Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 2, Name: "id", Type: 
iceberg.PrimitiveTypes.Int32},
+               iceberg.NestedField{ID: 3, Name: "name", Type: 
iceberg.PrimitiveTypes.String, Required: true},
+               iceberg.NestedField{ID: 4, Name: "address", Required: true, 
Type: &iceberg.StructType{
+                       FieldList: []iceberg.NestedField{
+                               {ID: 5, Name: "street", Type: 
iceberg.PrimitiveTypes.String, Required: true},
+                               {ID: 6, Name: "city", Type: 
iceberg.PrimitiveTypes.String, Required: true},
+                               {ID: 7, Name: "zip", Type: 
iceberg.PrimitiveTypes.Int32, Required: true},
+                               {ID: 8, Name: colNameWithSpecialChar, Type: 
iceberg.PrimitiveTypes.String, Required: true},
+                       },
+               }})
+
+       arrowSchema := arrow.NewSchema([]arrow.Field{
+               {Name: colNameWithSpecialChar, Type: arrow.BinaryTypes.String, 
Nullable: true},
+               {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
+               {Name: "name", Type: arrow.BinaryTypes.String},
+               {Name: "address", Type: arrow.StructOf(
+                       arrow.Field{Name: "street", Type: 
arrow.BinaryTypes.String},
+                       arrow.Field{Name: "city", Type: 
arrow.BinaryTypes.String},
+                       arrow.Field{Name: "zip", Type: 
arrow.PrimitiveTypes.Int32},
+                       arrow.Field{Name: colNameWithSpecialChar, Type: 
arrow.BinaryTypes.String},
+               )},
+       }, nil)
+
+       arrowTable, err := array.TableFromJSON(memory.DefaultAllocator, 
arrowSchema, []string{
+               `[
+                       {
+                               "letter/abc": "a",
+                               "id": 1,
+                               "name": "AB",
+                               "address": {"street": "123", "city": "SFO", 
"zip": 12345, "letter/abc": "a"}
+                       },
+                       {
+                               "letter/abc": null,
+                               "id": 2,
+                               "name": "CD",
+                               "address": {"street": "456", "city": "SW", 
"zip": 67890, "letter/abc": "b"}
+                       },
+                       {
+                               "letter/abc": "z",
+                               "id": 3,
+                               "name": "EF",
+                               "address": {"street": "789", "city": "Random", 
"zip": 10112, "letter/abc": "c"}
+                       }
+                ]`,
+       })
+       t.Require().NoError(err)
+       defer arrowTable.Release()
+
+       tbl := t.createTable(ident, t.formatVersion, 
*iceberg.UnpartitionedSpec, s)
+       rdr := array.NewTableReader(arrowTable, 1)
+       defer rdr.Release()
+
+       tx := tbl.NewTransaction()
+       t.Require().NoError(tx.Append(t.ctx, rdr, nil))
+
+       scan, err := tx.Scan()
+       t.Require().NoError(err)
+
+       result, err := scan.ToArrowTable(t.ctx)
+       t.Require().NoError(err)
+       defer result.Release()
+
+       t.True(array.TableEqual(arrowTable, result), "expected:\n %s\ngot:\n 
%s", arrowTable, result)
+}
+
+func (t *TableWritingTestSuite) getInMemCatalog() catalog.Catalog {
+       cat, err := catalog.Load(context.Background(), "default", 
iceberg.Properties{
+               "uri":          ":memory:",
+               "type":         "sql",
+               sql.DriverKey:  sqliteshim.ShimName,
+               sql.DialectKey: string(sql.SQLite),
+               "warehouse":    "file://" + t.location,
+       })
+       t.Require().NoError(err)
+
+       return cat
+}
+
+func (t *TableWritingTestSuite) createTableWithProps(identifier 
table.Identifier, props iceberg.Properties, sc *iceberg.Schema) *table.Table {
+       cat := t.getInMemCatalog()
+       cat.DropTable(t.ctx, identifier)
+       cat.DropNamespace(t.ctx, catalog.NamespaceFromIdent(identifier))
+
+       t.Require().NoError(cat.CreateNamespace(t.ctx, 
catalog.NamespaceFromIdent(identifier), nil))
+       tbl, err := cat.CreateTable(t.ctx, identifier, sc, 
catalog.WithProperties(props),
+               catalog.WithLocation(t.location))
+
+       t.Require().NoError(err)
+
+       return tbl
+}
+
+func tableSchema() *iceberg.Schema {
+       return iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: "bool", Type: 
iceberg.PrimitiveTypes.Bool},
+               iceberg.NestedField{ID: 2, Name: "string", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 3, Name: "string_long", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 4, Name: "int", Type: 
iceberg.PrimitiveTypes.Int32},
+               iceberg.NestedField{ID: 5, Name: "long", Type: 
iceberg.PrimitiveTypes.Int64},
+               iceberg.NestedField{ID: 6, Name: "float", Type: 
iceberg.PrimitiveTypes.Float32},
+               iceberg.NestedField{ID: 7, Name: "double", Type: 
iceberg.PrimitiveTypes.Float64},
+               iceberg.NestedField{ID: 8, Name: "time", Type: 
iceberg.PrimitiveTypes.Time},
+               iceberg.NestedField{ID: 9, Name: "timestamp", Type: 
iceberg.PrimitiveTypes.Timestamp},
+               iceberg.NestedField{ID: 10, Name: "timestamptz", Type: 
iceberg.PrimitiveTypes.TimestampTz},
+               iceberg.NestedField{ID: 11, Name: "date", Type: 
iceberg.PrimitiveTypes.Date},
+               iceberg.NestedField{ID: 12, Name: "uuid", Type: 
iceberg.PrimitiveTypes.UUID},
+               iceberg.NestedField{ID: 13, Name: "binary", Type: 
iceberg.PrimitiveTypes.Binary},
+               iceberg.NestedField{ID: 14, Name: "fixed", Type: 
iceberg.FixedTypeOf(16)},
+       )

Review Comment:
   Should we add decimal as well?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to