tanmayrauth commented on code in PR #963:
URL: https://github.com/apache/iceberg-go/pull/963#discussion_r3189716023
##########
catalog/hadoop/hadoop.go:
##########
@@ -265,26 +300,139 @@ func (c *Catalog) CheckTableExists(_ context.Context, _
table.Identifier) (bool,
return false, errors.New("hadoop catalog: CheckTableExists not yet
implemented")
}
-func (c *Catalog) ListNamespaces(_ context.Context, _ table.Identifier)
([]table.Identifier, error) {
- return nil, errors.New("hadoop catalog: ListNamespaces not yet
implemented")
+func (c *Catalog) CreateNamespace(_ context.Context, ns table.Identifier,
props iceberg.Properties) error {
+ if len(ns) == 0 {
+ return errors.New("hadoop catalog: namespace identifier must
not be empty")
+ }
+
+ if err := validateIdentifier(ns); err != nil {
+ return err
+ }
+
+ if len(props) > 0 {
+ return errors.New("hadoop catalog: namespace properties are not
supported")
+ }
+
+ path := c.namespaceToPath(ns)
+
+ if err := os.Mkdir(path, 0o755); err != nil {
+ if os.IsExist(err) {
+ return fmt.Errorf("%w: %s",
catalog.ErrNamespaceAlreadyExists, strings.Join(ns, "."))
+ }
+
+ return fmt.Errorf("hadoop catalog: failed to create namespace:
%w", err)
Review Comment:
Went with the stricter "parent must exist" approach, detecting
fs.ErrNotExist from os.Mkdir and returning ErrNoSuchNamespace.
##########
catalog/hadoop/hadoop.go:
##########
@@ -52,6 +52,31 @@ var _ catalog.Catalog = (*Catalog)(nil)
// v1.metadata.json, v42.metadata.json, v1.gz.metadata.json, etc.
var versionPattern = regexp.MustCompile(`^v([0-9]+)(?:\.gz)?\.metadata\.json$`)
+// uuidMetadataPattern matches UUID-style metadata filenames produced by
+// Java/PyIceberg catalogs: 00000-<uuid>.metadata.json, etc.
+var uuidMetadataPattern =
regexp.MustCompile(`^[0-9]+-[0-9a-fA-F-]+\.metadata\.json$`)
+
+// validateIdentifier checks that each component of an identifier is safe for
+// use as a path segment. It rejects empty parts, path traversal sequences,
+// and components containing path separators.
+func validateIdentifier(ident table.Identifier) error {
+ for _, part := range ident {
+ if part == "" {
+ return errors.New("hadoop catalog: identifier component
must not be empty")
+ }
+
+ if part == "." || part == ".." {
+ return fmt.Errorf("hadoop catalog: invalid identifier
component %q", part)
+ }
+
+ if strings.ContainsAny(part, "/\\") {
+ return fmt.Errorf("hadoop catalog: identifier component
must not contain path separators: %q", part)
+ }
+ }
+
+ return nil
+}
Review Comment:
Moved the empty check into validateIdentifier wrapping with
ErrNoSuchNamespace. Added POSIX-best-effort doc note.
##########
catalog/hadoop/hadoop.go:
##########
@@ -52,6 +52,31 @@ var _ catalog.Catalog = (*Catalog)(nil)
// v1.metadata.json, v42.metadata.json, v1.gz.metadata.json, etc.
var versionPattern = regexp.MustCompile(`^v([0-9]+)(?:\.gz)?\.metadata\.json$`)
+// uuidMetadataPattern matches UUID-style metadata filenames produced by
+// Java/PyIceberg catalogs: 00000-<uuid>.metadata.json, etc.
+var uuidMetadataPattern =
regexp.MustCompile(`^[0-9]+-[0-9a-fA-F-]+\.metadata\.json$`)
Review Comment:
Tightened to 5-digit sequence + canonical 8-4-4-4-12 UUID + optional .gz
variant.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]