tanmayrauth commented on code in PR #963:
URL: https://github.com/apache/iceberg-go/pull/963#discussion_r3189714617
##########
catalog/hadoop/hadoop_test.go:
##########
@@ -386,3 +386,318 @@ func (s *HadoopCatalogTestSuite)
TestFindVersionMixedGzipAndPlain() {
s.Require().NoError(err)
s.Equal(3, ver)
}
+
+// CreateNamespace tests
+
+func (s *HadoopCatalogTestSuite) TestCreateNamespace() {
+ err := s.cat.CreateNamespace(context.Background(), []string{"ns"}, nil)
+ s.Require().NoError(err)
+
+ info, err := os.Stat(filepath.Join(s.warehouse, "ns"))
+ s.Require().NoError(err)
+ s.True(info.IsDir())
+}
+
+func (s *HadoopCatalogTestSuite) TestCreateNamespaceAlreadyExists() {
+ s.Require().NoError(os.Mkdir(filepath.Join(s.warehouse, "ns"), 0o755))
+
+ err := s.cat.CreateNamespace(context.Background(), []string{"ns"}, nil)
+ s.ErrorIs(err, catalog.ErrNamespaceAlreadyExists)
+}
+
+func (s *HadoopCatalogTestSuite) TestCreateNamespaceNested() {
+ // Parent namespaces must exist first with atomic os.Mkdir.
+ s.Require().NoError(os.Mkdir(filepath.Join(s.warehouse, "a"), 0o755))
+ s.Require().NoError(os.Mkdir(filepath.Join(s.warehouse, "a", "b"),
0o755))
+
+ err := s.cat.CreateNamespace(context.Background(), []string{"a", "b",
"c"}, nil)
+ s.Require().NoError(err)
+
+ info, err := os.Stat(filepath.Join(s.warehouse, "a", "b", "c"))
+ s.Require().NoError(err)
+ s.True(info.IsDir())
+}
+
+func (s *HadoopCatalogTestSuite) TestCreateNamespaceNestedParentMissing() {
+ // Creating a nested namespace without parent should fail.
+ err := s.cat.CreateNamespace(context.Background(), []string{"a", "b",
"c"}, nil)
+ s.Require().Error(err)
+ s.Contains(err.Error(), "failed to create namespace")
Review Comment:
Agreed. Switched to ErrorIs with sentinels. Only kept Contains for the
non-sentinel "properties not supported" case.
##########
catalog/hadoop/hadoop.go:
##########
@@ -265,26 +300,139 @@ func (c *Catalog) CheckTableExists(_ context.Context, _
table.Identifier) (bool,
return false, errors.New("hadoop catalog: CheckTableExists not yet
implemented")
}
-func (c *Catalog) ListNamespaces(_ context.Context, _ table.Identifier)
([]table.Identifier, error) {
- return nil, errors.New("hadoop catalog: ListNamespaces not yet
implemented")
+func (c *Catalog) CreateNamespace(_ context.Context, ns table.Identifier,
props iceberg.Properties) error {
+ if len(ns) == 0 {
+ return errors.New("hadoop catalog: namespace identifier must
not be empty")
+ }
+
+ if err := validateIdentifier(ns); err != nil {
+ return err
+ }
+
+ if len(props) > 0 {
+ return errors.New("hadoop catalog: namespace properties are not
supported")
+ }
+
+ path := c.namespaceToPath(ns)
+
+ if err := os.Mkdir(path, 0o755); err != nil {
+ if os.IsExist(err) {
+ return fmt.Errorf("%w: %s",
catalog.ErrNamespaceAlreadyExists, strings.Join(ns, "."))
+ }
+
+ return fmt.Errorf("hadoop catalog: failed to create namespace:
%w", err)
+ }
+
+ return nil
}
-func (c *Catalog) CreateNamespace(_ context.Context, _ table.Identifier, _
iceberg.Properties) error {
- return errors.New("hadoop catalog: CreateNamespace not yet implemented")
+func (c *Catalog) DropNamespace(_ context.Context, ns table.Identifier) error {
+ if len(ns) == 0 {
+ return errors.New("hadoop catalog: namespace identifier must
not be empty")
+ }
+
+ if err := validateIdentifier(ns); err != nil {
+ return err
+ }
+
+ path := c.namespaceToPath(ns)
+
+ if _, err := os.Stat(path); os.IsNotExist(err) {
+ return fmt.Errorf("%w: %s", catalog.ErrNoSuchNamespace,
strings.Join(ns, "."))
+ }
+
+ entries, err := os.ReadDir(path)
+ if err != nil {
+ return fmt.Errorf("hadoop catalog: failed to read namespace
directory: %w", err)
+ }
+
+ if len(entries) > 0 {
+ return fmt.Errorf("%w: %s", catalog.ErrNamespaceNotEmpty,
strings.Join(ns, "."))
+ }
+
+ return os.Remove(path)
}
-func (c *Catalog) DropNamespace(_ context.Context, _ table.Identifier) error {
- return errors.New("hadoop catalog: DropNamespace not yet implemented")
+func (c *Catalog) CheckNamespaceExists(_ context.Context, ns table.Identifier)
(bool, error) {
+ if err := validateIdentifier(ns); err != nil {
+ return false, err
+ }
+
+ path := c.namespaceToPath(ns)
+
+ info, err := os.Stat(path)
+ if os.IsNotExist(err) {
+ return false, nil
+ }
+
+ if err != nil {
+ return false, err
+ }
+
+ return info.IsDir(), nil
}
-func (c *Catalog) CheckNamespaceExists(_ context.Context, _ table.Identifier)
(bool, error) {
- return false, errors.New("hadoop catalog: CheckNamespaceExists not yet
implemented")
+func (c *Catalog) ListNamespaces(_ context.Context, parent table.Identifier)
([]table.Identifier, error) {
+ if len(parent) > 0 {
+ if err := validateIdentifier(parent); err != nil {
+ return nil, err
+ }
+ }
+
+ var path string
+
+ if len(parent) == 0 {
+ path = c.warehouse
+ } else {
+ path = c.namespaceToPath(parent)
+
+ info, err := os.Stat(path)
+ if os.IsNotExist(err) || (err == nil && !info.IsDir()) {
+ return nil, fmt.Errorf("%w: %s",
catalog.ErrNoSuchNamespace, strings.Join(parent, "."))
+ }
+ }
+
+ entries, err := os.ReadDir(path)
+ if err != nil {
+ return nil, fmt.Errorf("hadoop catalog: failed to read
directory: %w", err)
+ }
+
+ result := []table.Identifier{}
+ for _, e := range entries {
+ if !e.IsDir() {
+ continue
+ }
+
+ child := filepath.Join(path, e.Name())
+ if isTableDir(child) {
+ continue
+ }
+
+ result = append(result, table.Identifier{e.Name()})
+ }
+
+ return result, nil
}
-func (c *Catalog) LoadNamespaceProperties(_ context.Context, _
table.Identifier) (iceberg.Properties, error) {
- return nil, errors.New("hadoop catalog: LoadNamespaceProperties not yet
implemented")
+func (c *Catalog) LoadNamespaceProperties(_ context.Context, ns
table.Identifier) (iceberg.Properties, error) {
+ if err := validateIdentifier(ns); err != nil {
+ return nil, err
+ }
+
+ path := c.namespaceToPath(ns)
+
+ info, err := os.Stat(path)
+ if os.IsNotExist(err) || (err == nil && !info.IsDir()) {
+ return nil, fmt.Errorf("%w: %s", catalog.ErrNoSuchNamespace,
strings.Join(ns, "."))
+ }
+
+ if err != nil {
+ return nil, fmt.Errorf("hadoop catalog: failed to stat
namespace: %w", err)
+ }
+
+ return iceberg.Properties{"location": "file://" + path}, nil
Review Comment:
Fair point. Building via (&url.URL{Scheme: "file", Path: path}).String()
after normalizing warehouse to absolute in NewCatalog. Added doc comment noting
this is a Go-only convenience.
##########
catalog/hadoop/hadoop.go:
##########
@@ -265,26 +300,139 @@ func (c *Catalog) CheckTableExists(_ context.Context, _
table.Identifier) (bool,
return false, errors.New("hadoop catalog: CheckTableExists not yet
implemented")
}
-func (c *Catalog) ListNamespaces(_ context.Context, _ table.Identifier)
([]table.Identifier, error) {
- return nil, errors.New("hadoop catalog: ListNamespaces not yet
implemented")
+func (c *Catalog) CreateNamespace(_ context.Context, ns table.Identifier,
props iceberg.Properties) error {
+ if len(ns) == 0 {
+ return errors.New("hadoop catalog: namespace identifier must
not be empty")
+ }
+
+ if err := validateIdentifier(ns); err != nil {
+ return err
+ }
+
+ if len(props) > 0 {
+ return errors.New("hadoop catalog: namespace properties are not
supported")
+ }
+
+ path := c.namespaceToPath(ns)
+
+ if err := os.Mkdir(path, 0o755); err != nil {
+ if os.IsExist(err) {
+ return fmt.Errorf("%w: %s",
catalog.ErrNamespaceAlreadyExists, strings.Join(ns, "."))
+ }
+
+ return fmt.Errorf("hadoop catalog: failed to create namespace:
%w", err)
+ }
+
+ return nil
}
-func (c *Catalog) CreateNamespace(_ context.Context, _ table.Identifier, _
iceberg.Properties) error {
- return errors.New("hadoop catalog: CreateNamespace not yet implemented")
+func (c *Catalog) DropNamespace(_ context.Context, ns table.Identifier) error {
+ if len(ns) == 0 {
+ return errors.New("hadoop catalog: namespace identifier must
not be empty")
+ }
+
+ if err := validateIdentifier(ns); err != nil {
+ return err
+ }
+
+ path := c.namespaceToPath(ns)
+
+ if _, err := os.Stat(path); os.IsNotExist(err) {
Review Comment:
DropNamespace now relies on ReadDir alone (closes TOCTTOU). Switched all
os.IsNotExist/os.IsExist to errors.Is.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]