szehon-ho commented on code in PR #9852: URL: https://github.com/apache/iceberg/pull/9852#discussion_r1520430836
########## hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java: ########## @@ -222,24 +220,144 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) { @Override public void renameTable(TableIdentifier from, TableIdentifier originalTo) { - if (!isValidIdentifier(from)) { - throw new NoSuchTableException("Invalid identifier: %s", from); + renameContent(from, originalTo, HiveOperationsBase.ContentType.TABLE); + } + + @Override + public boolean dropView(TableIdentifier identifier) { + if (!isValidIdentifier(identifier)) { + return false; } - TableIdentifier to = removeCatalogName(originalTo); - Preconditions.checkArgument(isValidIdentifier(to), "Invalid identifier: %s", to); + try { + String database = identifier.namespace().level(0); + String viewName = identifier.name(); + Table table = clients.run(client -> client.getTable(database, viewName)); + HiveOperationsBase.validateTableIsIcebergView( + table, CatalogUtil.fullTableName(name, identifier)); + + HiveViewOperations ops = (HiveViewOperations) newViewOps(identifier); + ViewMetadata lastViewMetadata = null; + + try { + lastViewMetadata = ops.current(); + } catch (NotFoundException e) { + LOG.warn("Failed to load view metadata for view: {}", identifier, e); + } + + clients.run( + client -> { + client.dropTable(database, viewName); + return null; + }); + + if (lastViewMetadata != null) { + CatalogUtil.dropViewMetadata(ops.io(), lastViewMetadata); + } + + LOG.info("Dropped View: {}", identifier); + return true; + + } catch (NoSuchViewException | NoSuchObjectException e) { + LOG.info("Skipping drop, view does not exist: {}", identifier, e); + return false; + } catch (TException e) { + throw new RuntimeException("Failed to drop " + identifier, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted in call to dropView", e); + } + } + + @Override + public List<TableIdentifier> listViews(Namespace namespace) { + Preconditions.checkArgument( + isValidateNamespace(namespace), "Missing database in namespace: %s", namespace); + + try { + String database = namespace.level(0); + List<String> tableNames = + clients.run(client -> client.getTables(database, "*", TableType.VIRTUAL_VIEW)); + + // Retrieving the Table objects from HMS in batches to avoid OOM + List<TableIdentifier> filteredTableIdentifiers = Lists.newArrayList(); + Iterable<List<String>> tableNameSets = Iterables.partition(tableNames, 100); + + for (List<String> tableNameSet : tableNameSets) { + filteredTableIdentifiers.addAll( + filterIcebergEntities( + tableNameSet, namespace, HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE)); + } + + return filteredTableIdentifiers; + } catch (UnknownDBException e) { + throw new NoSuchNamespaceException("Namespace does not exist: %s", namespace); + + } catch (TException e) { + throw new RuntimeException("Failed to list all views under namespace " + namespace, e); + + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted in call to listViews", e); + } + } + + private List<TableIdentifier> filterIcebergEntities( + List<String> tableNames, Namespace namespace, String tableTypeProp) + throws TException, InterruptedException { + List<Table> tableObjects = + clients.run(client -> client.getTableObjectsByName(namespace.level(0), tableNames)); + return tableObjects.stream() + .filter( + table -> + table.getParameters() != null + && tableTypeProp.equalsIgnoreCase( + table.getParameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP))) + .map(table -> TableIdentifier.of(namespace, table.getTableName())) + .collect(Collectors.toList()); + } + + @Override + @SuppressWarnings("FormatStringAnnotation") + public void renameView(TableIdentifier from, TableIdentifier to) { if (!namespaceExists(to.namespace())) { throw new NoSuchNamespaceException( "Cannot rename %s to %s. Namespace does not exist: %s", from, to, to.namespace()); } + renameContent(from, to, HiveOperationsBase.ContentType.VIEW); + } + + private void renameContent( + TableIdentifier fromIdentifier, + TableIdentifier toIdentifier, + HiveOperationsBase.ContentType contentType) { + if (!isValidIdentifier(fromIdentifier)) { + throw new NoSuchViewException("Invalid identifier: %s", fromIdentifier); + } + + TableIdentifier to = removeCatalogName(toIdentifier); + Preconditions.checkArgument(isValidIdentifier(to), "Invalid identifier: %s", to); + if (!namespaceExists(to.namespace())) { + throw new NoSuchNamespaceException( + "Cannot rename %s to %s. Namespace does not exist: %s", + fromIdentifier, to, to.namespace()); + } String toDatabase = to.namespace().level(0); - String fromDatabase = from.namespace().level(0); - String fromName = from.name(); + String fromDatabase = fromIdentifier.namespace().level(0); + String fromName = fromIdentifier.name(); try { Table table = clients.run(client -> client.getTable(fromDatabase, fromName)); - HiveOperationsBase.validateTableIsIceberg(table, fullTableName(name, from)); + switch (contentType) { Review Comment: what do you think to pushdown the switch to a method in HiveOperationsBase called validateTable()? (pass in content). The switch statement there should choose the right string for the error message. ########## core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java: ########## @@ -309,65 +304,20 @@ protected enum CommitStatus { * @return Commit Status of Success, Failure or Unknown */ protected CommitStatus checkCommitStatus(String newMetadataLocation, TableMetadata config) { - int maxAttempts = - PropertyUtil.propertyAsInt( - config.properties(), COMMIT_NUM_STATUS_CHECKS, COMMIT_NUM_STATUS_CHECKS_DEFAULT); - long minWaitMs = - PropertyUtil.propertyAsLong( - config.properties(), - COMMIT_STATUS_CHECKS_MIN_WAIT_MS, - COMMIT_STATUS_CHECKS_MIN_WAIT_MS_DEFAULT); - long maxWaitMs = - PropertyUtil.propertyAsLong( - config.properties(), - COMMIT_STATUS_CHECKS_MAX_WAIT_MS, - COMMIT_STATUS_CHECKS_MAX_WAIT_MS_DEFAULT); - long totalRetryMs = - PropertyUtil.propertyAsLong( - config.properties(), - COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS, - COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS_DEFAULT); - - AtomicReference<CommitStatus> status = new AtomicReference<>(CommitStatus.UNKNOWN); - - Tasks.foreach(newMetadataLocation) - .retry(maxAttempts) - .suppressFailureWhenFinished() - .exponentialBackoff(minWaitMs, maxWaitMs, totalRetryMs, 2.0) - .onFailure( - (location, checkException) -> - LOG.error("Cannot check if commit to {} exists.", tableName(), checkException)) - .run( - location -> { - TableMetadata metadata = refresh(); - String currentMetadataFileLocation = metadata.metadataFileLocation(); - boolean commitSuccess = - currentMetadataFileLocation.equals(newMetadataLocation) - || metadata.previousFiles().stream() - .anyMatch(log -> log.file().equals(newMetadataLocation)); - if (commitSuccess) { - LOG.info( - "Commit status check: Commit to {} of {} succeeded", - tableName(), - newMetadataLocation); - status.set(CommitStatus.SUCCESS); - } else { - LOG.warn( - "Commit status check: Commit to {} of {} unknown, new metadata location is not current " - + "or in history", - tableName(), - newMetadataLocation); - } - }); - - if (status.get() == CommitStatus.UNKNOWN) { - LOG.error( - "Cannot determine commit state to {}. Failed during checking {} times. " - + "Treating commit state as unknown.", - tableName(), - maxAttempts); - } - return status.get(); + return checkCommitStatus( + tableName(), newMetadataLocation, config.properties(), this::loadMetadataLocations); + } + + protected List<String> loadMetadataLocations() { + TableMetadata metadata = refresh(); + ImmutableList.Builder<String> builder = ImmutableList.builder(); + return builder + .add(metadata.metadataFileLocation()) + .addAll( + metadata.previousFiles().stream() Review Comment: Hm, it seems even in the lock mode, we get a lock around the time we commit and the time we checkCommitStatus(). So I wonder if the tables logic is for the new mode (non-lock mode). @pvary do you know the context ? I think ViewMetadata already has this model (history() and ViewHistoryEntry()). It seems it should be set in general then, though it is true it is a bit hacky way to do it in Hive (even for tables), i just dont know any alternative atm. ########## hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java: ########## @@ -250,29 +368,53 @@ public void renameTable(TableIdentifier from, TableIdentifier originalTo) { return null; }); - LOG.info("Renamed table from {}, to {}", from, to); + LOG.info("Renamed {} from {}, to {}", contentType.name(), fromIdentifier, to); } catch (NoSuchObjectException e) { - throw new NoSuchTableException("Table does not exist: %s", from); + switch (contentType) { + case TABLE: + throw new NoSuchTableException( + "Cannot rename %s to %s. Table does not exist", fromIdentifier, to); + case VIEW: + throw new NoSuchViewException( + "Cannot rename %s to %s. View does not exist", fromIdentifier, to); + } } catch (InvalidOperationException e) { if (e.getMessage() != null && e.getMessage().contains(String.format("new table %s already exists", to))) { - throw new org.apache.iceberg.exceptions.AlreadyExistsException( - "Table already exists: %s", to); + throwErrorForExistedToContent(fromIdentifier, removeCatalogName(toIdentifier)); } else { - throw new RuntimeException("Failed to rename " + from + " to " + to, e); + throw new RuntimeException("Failed to rename " + fromIdentifier + " to " + to, e); } } catch (TException e) { - throw new RuntimeException("Failed to rename " + from + " to " + to, e); + throw new RuntimeException("Failed to rename " + fromIdentifier + " to " + to, e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException("Interrupted in call to rename", e); } } + private void throwErrorForExistedToContent(TableIdentifier from, TableIdentifier to) { + String toDatabase = to.namespace().level(0); + try { + Table table = clients.run(client -> client.getTable(toDatabase, to.name())); Review Comment: Why do we have to fetch the table again to throw an errro? ########## .palantir/revapi.yml: ########## @@ -968,6 +968,9 @@ acceptedBreaks: - code: "java.class.removed" old: "class org.apache.iceberg.rest.requests.UpdateTableRequest.Builder" justification: "Removing deprecated code" + - code: "java.class.removed" + old: "enum org.apache.iceberg.BaseMetastoreTableOperations.CommitStatus" Review Comment: @nk1506 can we make a public enum in the BaseMetastoreEnum one and deprecate this one then? Its not perfect to have it public now but I dont think we can avoid it because Java cannot double-inherit from two base classes. ########## hive-metastore/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java: ########## @@ -62,6 +81,57 @@ interface HiveOperationsBase { String table(); + String catalogName(); + + String entityType(); + + BaseMetastoreOperations.CommitStatus validateNewLocationAndReturnCommitStatus( + BaseMetadata metadata, String newMetadataLocation); + + default Table loadHmsTable() throws TException, InterruptedException { + try { + return metaClients().run(client -> client.getTable(database(), table())); + } catch (NoSuchObjectException nte) { + LOG.trace("{} not found {}", entityType(), fullName(), nte); + return null; + } + } + + void setHmsParameters( + BaseMetadata metadata, Review Comment: I guess we can explitily make this method take in everything that it needs to set, will that be better? (instead of BaseMetadata, pass in Schema, UUID, snapshot, etc). There is a non-trivial number of params though. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org