This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new fca9c91193f [fix](restore) Add restore_reset_index_id config #45283 (#45574) fca9c91193f is described below commit fca9c91193ffd917d1157e562ae584bde963ff03 Author: walter <maoch...@selectdb.com> AuthorDate: Wed Dec 18 22:45:53 2024 +0800 [fix](restore) Add restore_reset_index_id config #45283 (#45574) cherry pick from #45283 --- .../main/java/org/apache/doris/common/Config.java | 9 ++ .../apache/doris/alter/SchemaChangeHandler.java | 10 ++ .../java/org/apache/doris/catalog/OlapTable.java | 14 +- .../test_backup_restore_inverted_idx.groovy | 174 +++++++++++++++++++++ 4 files changed, 206 insertions(+), 1 deletion(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 1b5e412437b..9fead6e6f6e 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1574,6 +1574,15 @@ public class Config extends ConfigBase { @ConfField(mutable = false) public static boolean enable_restore_snapshot_rpc_compression = true; + /** + * A internal config, to indicate whether to reset the index id when restore olap table. + * + * The inverted index saves the index id in the file path/header, so the index id between + * two clusters must be the same. + */ + @ConfField(mutable = true, masterOnly = true) + public static boolean restore_reset_index_id = true; + /** * Control the max num of tablets per backup job involved. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index ebad5f24f4f..117297b0a79 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -2569,6 +2569,7 @@ public class SchemaChangeHandler extends AlterHandler { IndexDef indexDef = alterClause.getIndexDef(); Set<String> newColset = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); newColset.addAll(indexDef.getColumns()); + Set<Long> existedIndexIdSet = Sets.newHashSet(); for (Index existedIdx : existedIndexes) { if (existedIdx.getIndexName().equalsIgnoreCase(indexDef.getIndexName())) { if (indexDef.isSetIfNotExists()) { @@ -2585,7 +2586,16 @@ public class SchemaChangeHandler extends AlterHandler { indexDef.getIndexType() + " index for columns (" + String.join(",", indexDef.getColumns()) + " ) already exist."); } + existedIndexIdSet.add(existedIdx.getIndexId()); } + + // The restored olap table may not reset the index id, which comes from the upstream, + // so we need to check and reset the index id here, to avoid confliction. + // See OlapTable.resetIdsForRestore for details. + while (existedIndexIdSet.contains(alterIndex.getIndexId())) { + alterIndex.setIndexId(Env.getCurrentEnv().getNextId()); + } + boolean disableInvertedIndexV1ForVariant = olapTable.getInvertedIndexStorageFormat() == TInvertedIndexStorageFormat.V1 && ConnectContext.get().getSessionVariable() .getDisableInvertedIndexV1ForVaraint(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index d64b8febc7d..4f85a989e46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -833,7 +833,19 @@ public class OlapTable extends Table implements MTMVRelatedTableIf { if (this.indexes != null) { List<Index> indexes = this.indexes.getIndexes(); for (Index idx : indexes) { - idx.setIndexId(env.getNextId()); + long newIdxId; + if (Config.restore_reset_index_id) { + newIdxId = env.getNextId(); + } else { + // The index id from the upstream is used, if restore_reset_index_id is not set. + // + // This is because the index id is used as a part of inverted file name/header + // in BE. During restore, the inverted file is copied from the upstream to the + // downstream. If the index id is changed, it might cause the BE to fail to find + // the inverted files. + newIdxId = idx.getIndexId(); + } + idx.setIndexId(newIdxId); } for (Map.Entry<Long, MaterializedIndexMeta> entry : indexIdToMeta.entrySet()) { entry.getValue().setIndexes(indexes); diff --git a/regression-test/suites/backup_restore/test_backup_restore_inverted_idx.groovy b/regression-test/suites/backup_restore/test_backup_restore_inverted_idx.groovy new file mode 100644 index 00000000000..0dadc99dd21 --- /dev/null +++ b/regression-test/suites/backup_restore/test_backup_restore_inverted_idx.groovy @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_backup_restore_inverted_idx", "backup_restore") { + String suiteName = "test_backup_restore_inverted_idx" + String dbName = "${suiteName}_db" + String repoName = "repo_" + UUID.randomUUID().toString().replace("-", "") + String snapshotName = "${suiteName}_snapshot" + String tableName = "${suiteName}_table" + + def syncer = getSyncer() + syncer.createS3Repository(repoName) + sql "CREATE DATABASE IF NOT EXISTS ${dbName}" + + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + sql """ + CREATE TABLE ${dbName}.${tableName} ( + `id` LARGEINT NOT NULL, + `value` STRING DEFAULT "", + `value1` STRING DEFAULT "", + INDEX `idx_value` (`value`) USING INVERTED PROPERTIES ("parser" = "english") + ) + UNIQUE KEY(`id`) + PARTITION BY RANGE(`id`) + ( + PARTITION p1 VALUES LESS THAN ("10"), + PARTITION p2 VALUES LESS THAN ("20"), + PARTITION p3 VALUES LESS THAN ("30"), + PARTITION p4 VALUES LESS THAN ("40"), + PARTITION p5 VALUES LESS THAN ("50"), + PARTITION p6 VALUES LESS THAN ("60"), + PARTITION p7 VALUES LESS THAN ("70") + ) + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES + ( + "replication_num" = "1" + ) + """ + List<String> values = [] + int numRows = 6; + for (int j = 0; j <= numRows; ++j) { + values.add("(${j}1, \"${j} ${j*10} ${j*100}\", \"${j*11} ${j*12}\")") + } + sql "INSERT INTO ${dbName}.${tableName} VALUES ${values.join(",")}" + + def indexes = sql_return_maparray "SHOW INDEX FROM ${dbName}.${tableName}" + logger.info("current indexes: ${indexes}") + assertTrue(indexes.any { it.Key_name == "idx_value" && it.Index_type == "INVERTED" }) + + def query_index_id = { indexName -> + def res = sql_return_maparray "SHOW TABLETS FROM ${dbName}.${tableName}" + def tabletId = res[0].TabletId + res = sql_return_maparray "SHOW TABLET ${tabletId}" + def dbId = res[0].DbId + def tableId = res[0].TableId + res = sql_return_maparray """ SHOW PROC "/dbs/${dbId}/${tableId}/indexes" """ + for (def record in res) { + if (record.KeyName == indexName) { + return record.IndexId + } + } + throw new Exception("index ${indexName} is not exists") + } + + try { + sql """ ADMIN SET FRONTEND CONFIG ("restore_reset_index_id" = "false") """ + sql """ + BACKUP SNAPSHOT ${dbName}.${snapshotName} + TO `${repoName}` + ON (`${tableName}`) + """ + + syncer.waitSnapshotFinish(dbName) + + def snapshot = syncer.getSnapshotTimestamp(repoName, snapshotName) + assertTrue(snapshot != null) + + def indexId = query_index_id("idx_value") + logger.info("the exists index id is ${indexId}") + + sql "DROP TABLE ${dbName}.${tableName}" + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON (`${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + indexes = sql_return_maparray "SHOW INDEX FROM ${dbName}.${tableName}" + logger.info("current indexes: ${indexes}") + assertTrue(indexes.any { it.Key_name == "idx_value" && it.Index_type == "INVERTED" }) + + def newIndexId = query_index_id("idx_value") + assertTrue(newIndexId == indexId, "old index id ${indexId}, new index id ${newIndexId}") + + // 1. query with inverted index + sql """ set enable_match_without_inverted_index = false """ + def res = sql """ SELECT /*+ SET_VAR(inverted_index_skip_threshold = 0) */ * FROM ${dbName}.${tableName} WHERE value MATCH_ANY "10" """ + assertTrue(res.size() > 0) + + // 2. add partition and query + sql """ ALTER TABLE ${dbName}.${tableName} ADD PARTITION p8 VALUES LESS THAN ("80") """ + sql """ INSERT INTO ${dbName}.${tableName} VALUES (75, "75 750", "76 77") """ + res = sql """ SELECT /*+ SET_VAR(inverted_index_skip_threshold = 0) */ * FROM ${dbName}.${tableName} WHERE value MATCH_ANY "75" """ + assertTrue(res.size() > 0) + + // 3. add new index + sql """ ALTER TABLE ${dbName}.${tableName} + ADD INDEX idx_value1(value1) USING INVERTED PROPERTIES("parser" = "english") """ + + indexes = sql_return_maparray """ SHOW INDEX FROM ${dbName}.${tableName} """ + logger.info("current indexes: ${indexes}") + assertTrue(indexes.any { it.Key_name == "idx_value1" && it.Index_type == "INVERTED" }) + + // 4. drop old index + sql """ ALTER TABLE ${dbName}.${tableName} DROP INDEX idx_value""" + indexes = sql_return_maparray """ SHOW INDEX FROM ${dbName}.${tableName} """ + logger.info("current indexes: ${indexes}") + assertFalse(indexes.any { it.Key_name == "idx_value" && it.Index_type == "INVERTED" }) + + // 5. query new index with inverted idx + sql """ INSERT INTO ${dbName}.${tableName} VALUES(76, "76 760", "12321 121") """ + sql """ BUILD INDEX idx_value1 ON ${dbName}.${tableName} """ + def build_index_finished = false + for (int i = 0; i < 100; i++) { + def build_status = sql_return_maparray """ + SHOW BUILD INDEX FROM ${dbName} WHERE TableName = "${tableName}" """ + if (!(build_status.any { it.State != 'FINISHED' })) { + build_index_finished = true + break + } + sleep(1000) + } + if (!build_index_finished) { + def build_status = sql_return_maparray """ + SHOW BUILD INDEX FROM ${dbName} WHERE TableName = "${tableName}" """ + logger.info("the build index status: ${build_status}") + assertTrue(false) + } + res = sql """ SELECT /*+ SET_VAR(inverted_index_skip_threshold = 0) */ * FROM ${dbName}.${tableName} WHERE value1 MATCH_ANY "12321" """ + assertTrue(res.size() > 0) + + } finally { + sql """ set enable_match_without_inverted_index = true """ + sql """ ADMIN SET FRONTEND CONFIG ("restore_reset_index_id" = "true") """ + } + + sql "DROP TABLE ${dbName}.${tableName} FORCE" + sql "DROP DATABASE ${dbName} FORCE" + sql "DROP REPOSITORY `${repoName}`" +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org