diqiu50 commented on code in PR #10383:
URL: https://github.com/apache/gravitino/pull/10383#discussion_r2958835625


##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseDatabaseOperations.java:
##########
@@ -133,12 +180,14 @@ protected void dropDatabase(String databaseName, boolean 
cascade) {
           if (rs.next()) {
             throw new IllegalStateException(
                 String.format(

Review Comment:
   Should NoOpException



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseTableOperations.java:
##########
@@ -798,14 +804,19 @@ protected String generateAlterTableSql(
       String newComment = updateComment.getNewComment();
       if (null == StringIdentifier.fromComment(newComment)) {
         // Detect and add Gravitino id.
-        JdbcTable jdbcTable = getOrCreateTable(databaseName, tableName, 
lazyLoadTable);
-        StringIdentifier identifier = 
StringIdentifier.fromComment(jdbcTable.comment());
+        lazyLoadTable = getOrCreateTable(databaseName, tableName, 
lazyLoadTable);
+        StringIdentifier identifier = 
StringIdentifier.fromComment(lazyLoadTable.comment());
         if (null != identifier) {
           newComment = StringIdentifier.addToComment(identifier, newComment);
         }
       }
-      String escapedComment = newComment.replace("'", "''");
-      alterSql.add(" MODIFY COMMENT '%s'".formatted(escapedComment));
+      // Re-embed cluster metadata so it is not lost when the comment is 
updated.
+      lazyLoadTable = getOrCreateTable(databaseName, tableName, lazyLoadTable);

Review Comment:
   Why we call the getOrCreateTable twice



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseClusterUtils.java:
##########
@@ -0,0 +1,111 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.gravitino.catalog.clickhouse.operations;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Utilities for embedding and extracting ClickHouse cluster metadata in 
object COMMENT fields.
+ *
+ * <p><b>Why COMMENT?</b>
+ *
+ * <p>ClickHouse does not persist {@code ON CLUSTER} information in any 
queryable system table for
+ * non-Replicated objects:
+ *
+ * <ul>
+ *   <li>{@code SHOW CREATE DATABASE} omits the {@code ON CLUSTER} clause.
+ *   <li>{@code SHOW CREATE TABLE} omits the {@code ON CLUSTER} clause (each 
node stores the local
+ *       DDL without the distribution directive).
+ *   <li>{@code system.databases.cluster} is only populated for {@code 
Replicated}-engine databases.
+ * </ul>
+ *
+ * <p>Gravitino therefore embeds the cluster name inside the object's COMMENT 
field at creation time
+ * using a non-printable SOH separator ({@code \u0001}). The metadata is 
invisible to end users
+ * because Gravitino strips it before surfacing the comment.
+ *
+ * <p><b>Stored format:</b> {@code userComment\u0001ch.cluster=clusterName}
+ *
+ * <p><b>Limitation:</b> This mechanism only works for databases and tables 
created through
+ * Gravitino. If a database or table was created directly in ClickHouse 
(bypassing Gravitino),
+ * Gravitino has no way to determine whether it was created {@code ON CLUSTER} 
or which cluster name
+ * was used. In that case {@link #extractClusterFromComment} returns {@code 
null} and the {@code
+ * on-cluster} / {@code cluster-name} properties reported by Gravitino will be 
absent or inaccurate.
+ */

Review Comment:
   Give an example for the comment of `show create table` command 



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseTableOperations.java:
##########
@@ -798,14 +804,19 @@ protected String generateAlterTableSql(
       String newComment = updateComment.getNewComment();
       if (null == StringIdentifier.fromComment(newComment)) {
         // Detect and add Gravitino id.
-        JdbcTable jdbcTable = getOrCreateTable(databaseName, tableName, 
lazyLoadTable);
-        StringIdentifier identifier = 
StringIdentifier.fromComment(jdbcTable.comment());
+        lazyLoadTable = getOrCreateTable(databaseName, tableName, 
lazyLoadTable);
+        StringIdentifier identifier = 
StringIdentifier.fromComment(lazyLoadTable.comment());
         if (null != identifier) {
           newComment = StringIdentifier.addToComment(identifier, newComment);
         }
       }
-      String escapedComment = newComment.replace("'", "''");
-      alterSql.add(" MODIFY COMMENT '%s'".formatted(escapedComment));
+      // Re-embed cluster metadata so it is not lost when the comment is 
updated.
+      lazyLoadTable = getOrCreateTable(databaseName, tableName, lazyLoadTable);
+      String clusterName = 
lazyLoadTable.properties().get(ClusterConstants.CLUSTER_NAME);
+      if (StringUtils.isNotBlank(clusterName)) {
+        newComment = ClickHouseClusterUtils.embedClusterInComment(newComment, 
clusterName);
+      }
+      alterSql.add(" MODIFY COMMENT '%s'".formatted(newComment.replace("'", 
"''")));

Review Comment:
   Call the unEscapeComment function is better



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseDatabaseOperations.java:
##########
@@ -94,10 +104,12 @@ protected String generateCreateDatabaseSql(
     if (onCluster(properties)) {
       String clusterName = properties.get(ClusterConstants.CLUSTER_NAME);
       createDatabaseSql.append(String.format(" ON CLUSTER `%s`", clusterName));
-    }
-
-    if (StringUtils.isNotEmpty(originComment)) {
-      createDatabaseSql.append(String.format(" COMMENT '%s'", originComment));
+      // Embed the cluster name into the COMMENT so it can be retrieved later 
(e.g., at DROP time).
+      // ClickHouse does not persist ON CLUSTER info in SHOW CREATE DATABASE 
for Atomic databases.
+      String storedComment = embedClusterInComment(originComment, clusterName);
+      createDatabaseSql.append(String.format(" COMMENT '%s'", 
storedComment.replace("'", "''")));
+    } else if (StringUtils.isNotEmpty(originComment)) {
+      createDatabaseSql.append(String.format(" COMMENT '%s'", 
originComment.replace("'", "''")));
     }

Review Comment:
   Add the functions escapeComments and unescapeComments to handle here



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseDatabaseOperations.java:
##########
@@ -124,12 +127,65 @@ public void create(String databaseName, String comment, 
Map<String, String> prop
   protected void dropDatabase(String databaseName, boolean cascade) {
     try (final Connection connection = getConnection()) {
       connection.setCatalog(createSysDatabaseNameSet().iterator().next());
-      JdbcConnectorUtils.executeUpdate(connection, 
generateDropDatabaseSql(databaseName, cascade));
+      if (!cascade) {
+        try (Statement stmt = connection.createStatement();
+            ResultSet rs = stmt.executeQuery(String.format("SHOW TABLES IN 
`%s`", databaseName))) {

Review Comment:
   May be the table is just not in this host, if you don't want handle this 
case, add some comments



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseDatabaseOperations.java:
##########
@@ -124,12 +127,65 @@ public void create(String databaseName, String comment, 
Map<String, String> prop
   protected void dropDatabase(String databaseName, boolean cascade) {
     try (final Connection connection = getConnection()) {
       connection.setCatalog(createSysDatabaseNameSet().iterator().next());
-      JdbcConnectorUtils.executeUpdate(connection, 
generateDropDatabaseSql(databaseName, cascade));
+      if (!cascade) {
+        try (Statement stmt = connection.createStatement();
+            ResultSet rs = stmt.executeQuery(String.format("SHOW TABLES IN 
`%s`", databaseName))) {
+          if (rs.next()) {
+            throw new IllegalStateException(
+                String.format(
+                    "Database %s is not empty, the value of cascade should be 
true.",
+                    databaseName));
+          }
+        }
+      }
+      String clusterName = getDatabaseClusterName(connection, databaseName);
+      JdbcConnectorUtils.executeUpdate(
+          connection, generateDropDatabaseSql(databaseName, clusterName));
     } catch (final SQLException se) {
       throw this.exceptionMapper.toGravitinoException(se);
     }
   }
 
+  /**
+   * Generates the SQL statement to drop a ClickHouse database. If the 
database was created with ON
+   * CLUSTER, the DROP statement includes {@code ON CLUSTER `clusterName` 
SYNC} to propagate the
+   * operation across all cluster nodes synchronously.
+   *
+   * @param databaseName The name of the database to drop.
+   * @param clusterName The cluster name extracted from the database's CREATE 
SQL, or {@code null}
+   *     if the database is not on a cluster.
+   * @return The DROP DATABASE SQL statement.
+   */
+  @VisibleForTesting
+  String generateDropDatabaseSql(String databaseName, String clusterName) {
+    StringBuilder sql = new StringBuilder(String.format("DROP DATABASE `%s`", 
databaseName));
+    if (StringUtils.isNotBlank(clusterName)) {
+      sql.append(String.format(" ON CLUSTER `%s` SYNC", clusterName));

Review Comment:
   In a normal ClickHouse cluster, the database should not be visible after 
being dropped.



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseClusterUtils.java:
##########
@@ -0,0 +1,111 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.gravitino.catalog.clickhouse.operations;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Utilities for embedding and extracting ClickHouse cluster metadata in 
object COMMENT fields.
+ *
+ * <p><b>Why COMMENT?</b>
+ *
+ * <p>ClickHouse does not persist {@code ON CLUSTER} information in any 
queryable system table for
+ * non-Replicated objects:
+ *
+ * <ul>
+ *   <li>{@code SHOW CREATE DATABASE} omits the {@code ON CLUSTER} clause.
+ *   <li>{@code SHOW CREATE TABLE} omits the {@code ON CLUSTER} clause (each 
node stores the local
+ *       DDL without the distribution directive).
+ *   <li>{@code system.databases.cluster} is only populated for {@code 
Replicated}-engine databases.
+ * </ul>
+ *
+ * <p>Gravitino therefore embeds the cluster name inside the object's COMMENT 
field at creation time
+ * using a non-printable SOH separator ({@code \u0001}). The metadata is 
invisible to end users
+ * because Gravitino strips it before surfacing the comment.
+ *
+ * <p><b>Stored format:</b> {@code userComment\u0001ch.cluster=clusterName}
+ *
+ * <p><b>Limitation:</b> This mechanism only works for databases and tables 
created through
+ * Gravitino. If a database or table was created directly in ClickHouse 
(bypassing Gravitino),
+ * Gravitino has no way to determine whether it was created {@code ON CLUSTER} 
or which cluster name
+ * was used. In that case {@link #extractClusterFromComment} returns {@code 
null} and the {@code
+ * on-cluster} / {@code cluster-name} properties reported by Gravitino will be 
absent or inaccurate.
+ */
+public final class ClickHouseClusterUtils {
+
+  /**
+   * Separator character between the user comment and the cluster metadata 
token. SOH (U+0001) is a
+   * non-printable control character that will not appear in normal 
user-supplied comments.
+   */
+  @VisibleForTesting public static final char CLUSTER_META_SEP = '\u0001';

Review Comment:
   The separator is invisible to users, so it cannot be manually fixed when 
have some bugs.



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseTableOperations.java:
##########
@@ -190,10 +188,15 @@ protected String generateCreateTableSql(
 
     appendPartitionClause(partitioning, sqlBuilder, engine);
 
-    // Add table comment if specified
-    if (StringUtils.isNotEmpty(comment)) {
-      String escapedComment = comment.replace("'", "''");
-      sqlBuilder.append(" COMMENT '%s'".formatted(escapedComment));
+    // Add table comment; embed cluster name so it can be recovered at 
DROP/ALTER time.
+    // ClickHouse does not persist ON CLUSTER in SHOW CREATE TABLE (see 
ClickHouseClusterUtils).
+    String storedComment =
+        onCluster
+            ? ClickHouseClusterUtils.embedClusterInComment(
+                comment, notNullProperties.get(ClusterConstants.CLUSTER_NAME))
+            : comment;
+    if (StringUtils.isNotEmpty(storedComment)) {
+      sqlBuilder.append(" COMMENT '%s'".formatted(storedComment.replace("'", 
"''")));

Review Comment:
   The same problem



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseClusterUtils.java:
##########
@@ -0,0 +1,111 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.gravitino.catalog.clickhouse.operations;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Utilities for embedding and extracting ClickHouse cluster metadata in 
object COMMENT fields.
+ *
+ * <p><b>Why COMMENT?</b>
+ *
+ * <p>ClickHouse does not persist {@code ON CLUSTER} information in any 
queryable system table for
+ * non-Replicated objects:
+ *
+ * <ul>
+ *   <li>{@code SHOW CREATE DATABASE} omits the {@code ON CLUSTER} clause.
+ *   <li>{@code SHOW CREATE TABLE} omits the {@code ON CLUSTER} clause (each 
node stores the local
+ *       DDL without the distribution directive).
+ *   <li>{@code system.databases.cluster} is only populated for {@code 
Replicated}-engine databases.
+ * </ul>
+ *
+ * <p>Gravitino therefore embeds the cluster name inside the object's COMMENT 
field at creation time
+ * using a non-printable SOH separator ({@code \u0001}). The metadata is 
invisible to end users
+ * because Gravitino strips it before surfacing the comment.
+ *
+ * <p><b>Stored format:</b> {@code userComment\u0001ch.cluster=clusterName}
+ *
+ * <p><b>Limitation:</b> This mechanism only works for databases and tables 
created through
+ * Gravitino. If a database or table was created directly in ClickHouse 
(bypassing Gravitino),
+ * Gravitino has no way to determine whether it was created {@code ON CLUSTER} 
or which cluster name
+ * was used. In that case {@link #extractClusterFromComment} returns {@code 
null} and the {@code
+ * on-cluster} / {@code cluster-name} properties reported by Gravitino will be 
absent or inaccurate.
+ */
+public final class ClickHouseClusterUtils {
+
+  /**
+   * Separator character between the user comment and the cluster metadata 
token. SOH (U+0001) is a
+   * non-printable control character that will not appear in normal 
user-supplied comments.
+   */
+  @VisibleForTesting public static final char CLUSTER_META_SEP = '\u0001';

Review Comment:
   Add a hit to mark it's generate by Gravitino server



##########
catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/operations/TestClickHouseTableOperationsCluster.java:
##########
@@ -254,6 +259,107 @@ void 
testGenerateDropTableSqlOnClusterWithoutClusterName() {
     Assertions.assertEquals("DROP TABLE `orders`", sql);
   }
 
+  // 
---------------------------------------------------------------------------
+  // Cluster metadata embedded in COMMENT
+  // 
---------------------------------------------------------------------------
+
+  /**
+   * When a MergeTree table is created ON CLUSTER, the cluster name must be 
embedded in the stored
+   * COMMENT so it can be recovered at DROP/load time (SHOW CREATE TABLE omits 
ON CLUSTER).
+   */
+  @Test
+  void testCreateTableOnClusterEmbedsCluterNameInComment() {

Review Comment:
   Cluster
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to