nastra commented on code in PR #12892:
URL: https://github.com/apache/iceberg/pull/12892#discussion_r2266012309
##########
aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java:
##########
@@ -278,9 +295,11 @@ protected String defaultWarehouseLocation(TableIdentifier
tableIdentifier) {
tableIdentifier,
awsProperties.glueCatalogSkipNameValidation()))
.build());
String dbLocationUri = response.database().locationUri();
+ String tableNameComponent =
+ LocationUtil.getTableNameComponent(tableIdentifier,
uniqueTableLocation);
Review Comment:
we try to avoid using `get` prefixes in methods. Maybe name this
`tableLocation()`
##########
aws/src/test/java/org/apache/iceberg/aws/dynamodb/TestDynamoDbCatalog.java:
##########
@@ -103,4 +104,37 @@ public void testDefaultWarehouseLocationNoNamespace() {
.isInstanceOf(NoSuchNamespaceException.class)
.hasMessageContaining("Cannot find default warehouse location:");
}
+
+ @Test
+ public void testDefaultWarehouseLocationUniqueNoDbUri() throws Exception {
Review Comment:
```suggestion
public void testDefaultWarehouseLocationUniqueWithoutDbUri() throws
Exception {
```
##########
core/src/main/java/org/apache/iceberg/CatalogProperties.java:
##########
@@ -157,6 +157,9 @@ private CatalogProperties() {}
public static final String APP_ID = "app-id";
public static final String USER = "user";
+ public static final String UNIQUE_TABLE_LOCATION = "unique-table-location";
Review Comment:
we should make sure to also update the docs around catalog properties
##########
aws/src/test/java/org/apache/iceberg/aws/dynamodb/TestDynamoDbCatalog.java:
##########
@@ -103,4 +104,37 @@ public void testDefaultWarehouseLocationNoNamespace() {
.isInstanceOf(NoSuchNamespaceException.class)
.hasMessageContaining("Cannot find default warehouse location:");
}
+
+ @Test
+ public void testDefaultWarehouseLocationUniqueNoDbUri() throws Exception {
+ try (DynamoDbCatalog catalog = new DynamoDbCatalog()) {
+ catalog.initialize(CATALOG_NAME, WAREHOUSE_PATH, new AwsProperties(),
dynamo, null, true);
+
Mockito.doReturn(GetItemResponse.builder().item(Maps.newHashMap()).build())
+ .when(dynamo)
+ .getItem(any(GetItemRequest.class));
+
+ String defaultWarehouseLocation =
catalog.defaultWarehouseLocation(TABLE_IDENTIFIER);
+ assertThat(defaultWarehouseLocation).matches(WAREHOUSE_PATH +
"/db.db/table-[a-z0-9]{32}");
+ }
+ }
+
+ @Test
+ public void testDefaultWarehouseLocationUniqueDbUri() throws Exception {
Review Comment:
```suggestion
public void testDefaultWarehouseLocationUniqueWithDbUri() throws Exception
{
```
##########
core/src/main/java/org/apache/iceberg/util/LocationUtil.java:
##########
@@ -33,4 +35,26 @@ public static String stripTrailingSlash(String path) {
}
return result;
}
+
+ /**
+ * Returns a path component derived from the {@code tableIdentifier}, used
as part of the table
+ * location URI.
+ *
+ * <p>If {@code unique} is {@code true}, the returned component will include
a random UUID suffix.
+ * Otherwise, the plain table name is returned.
+ *
+ * @param tableIdentifier Iceberg table identifier
+ * @param unique whether to ensure uniqueness
+ * @return a string representing the table name component for a location URI
+ */
+ public static String getTableNameComponent(TableIdentifier tableIdentifier,
boolean unique) {
Review Comment:
```suggestion
public static String tableLocation(TableIdentifier tableIdentifier,
boolean useUniqueLocation) {
```
##########
core/src/main/java/org/apache/iceberg/inmemory/InMemoryCatalog.java:
##########
@@ -104,8 +112,9 @@ protected TableOperations newTableOps(TableIdentifier
tableIdentifier) {
@Override
protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) {
- return SLASH.join(
- defaultNamespaceLocation(tableIdentifier.namespace()),
tableIdentifier.name());
+ String tableNameComponent =
Review Comment:
```suggestion
String tableLocation =
```
please also update this in all the other places that use the same naming
##########
spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestUniqueLocation.java:
##########
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assumptions.assumeThat;
+
+import java.util.UUID;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.DeleteOrphanFiles;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.exceptions.NotFoundException;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.spark.CatalogTestBase;
+import org.apache.iceberg.spark.SparkCatalogConfig;
+import org.apache.iceberg.spark.actions.SparkActions;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+
+@ExtendWith(ParameterizedTestExtension.class)
+public class TestUniqueLocation extends CatalogTestBase {
Review Comment:
```suggestion
public class TestUniqueTableLocation extends CatalogTestBase {
```
##########
core/src/main/java/org/apache/iceberg/util/LocationUtil.java:
##########
@@ -33,4 +35,26 @@ public static String stripTrailingSlash(String path) {
}
return result;
}
+
+ /**
+ * Returns a path component derived from the {@code tableIdentifier}, used
as part of the table
+ * location URI.
+ *
+ * <p>If {@code unique} is {@code true}, the returned component will include
a random UUID suffix.
+ * Otherwise, the plain table name is returned.
+ *
+ * @param tableIdentifier Iceberg table identifier
+ * @param unique whether to ensure uniqueness
+ * @return a string representing the table name component for a location URI
+ */
+ public static String getTableNameComponent(TableIdentifier tableIdentifier,
boolean unique) {
Review Comment:
please also add some tests to `TestLocationUtil` where the identifier is
null and the flag is true/false
##########
spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestUniqueLocation.java:
##########
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assumptions.assumeThat;
+
+import java.util.UUID;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.DeleteOrphanFiles;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.exceptions.NotFoundException;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.spark.CatalogTestBase;
+import org.apache.iceberg.spark.SparkCatalogConfig;
+import org.apache.iceberg.spark.actions.SparkActions;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+
+@ExtendWith(ParameterizedTestExtension.class)
+public class TestUniqueLocation extends CatalogTestBase {
+
+ private String renamedTableName;
+ private TableIdentifier renamedIdent;
+
+ @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
+ protected static Object[][] parameters() {
+ return new Object[][] {
+ {
+ SparkCatalogConfig.HIVE_WITH_UNIQUE_LOCATION.catalogName(),
+ SparkCatalogConfig.HIVE_WITH_UNIQUE_LOCATION.implementation(),
+ SparkCatalogConfig.HIVE_WITH_UNIQUE_LOCATION.properties()
+ },
+ {
+ SparkCatalogConfig.REST_WITH_UNIQUE_LOCATION.catalogName(),
+ SparkCatalogConfig.REST_WITH_UNIQUE_LOCATION.implementation(),
+ ImmutableMap.builder()
+ .putAll(SparkCatalogConfig.REST_WITH_UNIQUE_LOCATION.properties())
+ .put(CatalogProperties.URI,
restCatalog.properties().get(CatalogProperties.URI))
+ .build()
+ },
+ {
+ SparkCatalogConfig.SPARK_SESSION_WITH_UNIQUE_LOCATION.catalogName(),
+ SparkCatalogConfig.SPARK_SESSION_WITH_UNIQUE_LOCATION.implementation(),
+ SparkCatalogConfig.SPARK_SESSION_WITH_UNIQUE_LOCATION.properties()
+ },
+ };
+ }
+
+ @BeforeEach
+ public void initTableName() {
+ renamedTableName = tableName("table_2");
+ renamedIdent = TableIdentifier.of(Namespace.of("default"), "table_2");
+ }
+
+ @AfterEach
+ public void dropTestTable() {
+ try {
+ sql("DROP TABLE IF EXISTS %s", tableName);
+ sql("DROP TABLE IF EXISTS %s", renamedTableName);
+ } catch (NotFoundException ignore) {
+ // Swallow FNF exception in case of corrupted table so test failure
reason is clearer
+ }
+ }
+
+ @TestTemplate
+ public void testNoCollisionAfterRename() {
+ assumeThat(uniqueTableLocation()).isTrue();
+
+ assertThat(validationCatalog.tableExists(tableIdent))
+ .as("Precondition: %s should not exist", tableIdent)
+ .isFalse();
+ assertThat(validationCatalog.tableExists(renamedIdent))
+ .as("Precondition: %s should not exist", renamedIdent)
+ .isFalse();
+
+ sql("CREATE TABLE %s (id BIGINT NOT NULL, data STRING) USING iceberg",
tableName);
+
+ sql("ALTER TABLE %s RENAME TO %s", tableName, renamedIdent.name());
+
+ sql("CREATE TABLE %s (id BIGINT NOT NULL, data STRING) USING iceberg",
tableName);
+
+ Table table = validationCatalog.loadTable(tableIdent);
+ Table renamedTable = validationCatalog.loadTable(renamedIdent);
+
+ assertThat(table.location())
+ .as(
+ "After rename+recreate, %s and %s must have different locations",
+ tableName, renamedTableName)
+ .isNotEqualTo(renamedTable.location());
+ }
+
+ @TestTemplate
+ public void testDropDoesntCorruptTable() {
+ assumeThat(uniqueTableLocation()).isTrue();
+
+ assertThat(validationCatalog.tableExists(tableIdent))
+ .as("Precondition: %s should not exist", tableIdent)
Review Comment:
nit: I think you can remove `Precondition` from all of these
##########
core/src/main/java/org/apache/iceberg/util/LocationUtil.java:
##########
@@ -33,4 +35,26 @@ public static String stripTrailingSlash(String path) {
}
return result;
}
+
+ /**
+ * Returns a path component derived from the {@code tableIdentifier}, used
as part of the table
+ * location URI.
+ *
+ * <p>If {@code unique} is {@code true}, the returned component will include
a random UUID suffix.
+ * Otherwise, the plain table name is returned.
+ *
+ * @param tableIdentifier Iceberg table identifier
+ * @param unique whether to ensure uniqueness
+ * @return a string representing the table name component for a location URI
+ */
+ public static String getTableNameComponent(TableIdentifier tableIdentifier,
boolean unique) {
+ Preconditions.checkNotNull(tableIdentifier, "tableIdentifier must not be
null");
Review Comment:
```suggestion
Preconditions.checkArgument(null != tableIdentifier, "Invalid
identifier: null");
```
##########
spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestUniqueLocation.java:
##########
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.sql;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assumptions.assumeThat;
+
+import java.util.UUID;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.DeleteOrphanFiles;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.exceptions.NotFoundException;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.spark.CatalogTestBase;
+import org.apache.iceberg.spark.SparkCatalogConfig;
+import org.apache.iceberg.spark.actions.SparkActions;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+
+@ExtendWith(ParameterizedTestExtension.class)
+public class TestUniqueLocation extends CatalogTestBase {
+
+ private String renamedTableName;
+ private TableIdentifier renamedIdent;
+
+ @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
+ protected static Object[][] parameters() {
+ return new Object[][] {
+ {
+ SparkCatalogConfig.HIVE_WITH_UNIQUE_LOCATION.catalogName(),
+ SparkCatalogConfig.HIVE_WITH_UNIQUE_LOCATION.implementation(),
+ SparkCatalogConfig.HIVE_WITH_UNIQUE_LOCATION.properties()
+ },
+ {
+ SparkCatalogConfig.REST_WITH_UNIQUE_LOCATION.catalogName(),
+ SparkCatalogConfig.REST_WITH_UNIQUE_LOCATION.implementation(),
+ ImmutableMap.builder()
+ .putAll(SparkCatalogConfig.REST_WITH_UNIQUE_LOCATION.properties())
+ .put(CatalogProperties.URI,
restCatalog.properties().get(CatalogProperties.URI))
+ .build()
+ },
+ {
+ SparkCatalogConfig.SPARK_SESSION_WITH_UNIQUE_LOCATION.catalogName(),
+ SparkCatalogConfig.SPARK_SESSION_WITH_UNIQUE_LOCATION.implementation(),
+ SparkCatalogConfig.SPARK_SESSION_WITH_UNIQUE_LOCATION.properties()
+ },
+ };
+ }
+
+ @BeforeEach
+ public void initTableName() {
+ renamedTableName = tableName("table_2");
+ renamedIdent = TableIdentifier.of(Namespace.of("default"), "table_2");
+ }
+
+ @AfterEach
+ public void dropTestTable() {
+ try {
+ sql("DROP TABLE IF EXISTS %s", tableName);
+ sql("DROP TABLE IF EXISTS %s", renamedTableName);
+ } catch (NotFoundException ignore) {
+ // Swallow FNF exception in case of corrupted table so test failure
reason is clearer
+ }
+ }
+
+ @TestTemplate
+ public void testNoCollisionAfterRename() {
+ assumeThat(uniqueTableLocation()).isTrue();
+
+ assertThat(validationCatalog.tableExists(tableIdent))
+ .as("Precondition: %s should not exist", tableIdent)
+ .isFalse();
+ assertThat(validationCatalog.tableExists(renamedIdent))
+ .as("Precondition: %s should not exist", renamedIdent)
+ .isFalse();
+
+ sql("CREATE TABLE %s (id BIGINT NOT NULL, data STRING) USING iceberg",
tableName);
+
+ sql("ALTER TABLE %s RENAME TO %s", tableName, renamedIdent.name());
+
+ sql("CREATE TABLE %s (id BIGINT NOT NULL, data STRING) USING iceberg",
tableName);
+
+ Table table = validationCatalog.loadTable(tableIdent);
+ Table renamedTable = validationCatalog.loadTable(renamedIdent);
+
+ assertThat(table.location())
+ .as(
+ "After rename+recreate, %s and %s must have different locations",
+ tableName, renamedTableName)
+ .isNotEqualTo(renamedTable.location());
+ }
+
+ @TestTemplate
+ public void testDropDoesntCorruptTable() {
+ assumeThat(uniqueTableLocation()).isTrue();
+
+ assertThat(validationCatalog.tableExists(tableIdent))
+ .as("Precondition: %s should not exist", tableIdent)
+ .isFalse();
+ assertThat(validationCatalog.tableExists(renamedIdent))
+ .as("Precondition: %s should not exist", renamedIdent)
+ .isFalse();
+
+ sql("CREATE TABLE %s (id BIGINT NOT NULL, data STRING) USING iceberg",
tableName);
+ sql("INSERT INTO %s VALUES(0, '%s')", tableName,
UUID.randomUUID().toString());
+
+ sql("ALTER TABLE %s RENAME TO %s", tableName, renamedIdent.name());
+
+ sql("CREATE TABLE %s (id BIGINT NOT NULL, data STRING) USING iceberg",
tableName);
+ sql("INSERT INTO %s VALUES(1, '%s')", tableName,
UUID.randomUUID().toString());
+
+ sql("DROP TABLE %s PURGE", renamedTableName);
Review Comment:
maybe we should also test the drop part in `CatalogTests`?
##########
spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogConfig.java:
##########
@@ -37,7 +37,9 @@ public enum SparkCatalogConfig {
REST(
"testrest",
SparkCatalog.class.getName(),
- ImmutableMap.of("type", "rest", "cache-enabled", "false")),
+ ImmutableMap.of(
Review Comment:
nit: unrelated change
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]