anmolnar commented on code in PR #6887: URL: https://github.com/apache/hbase/pull/6887#discussion_r2056588724
########## hbase-protocol-shaded/src/main/protobuf/server/ActiveClusterSuffix.proto: ########## @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +syntax = "proto2"; +// This file contains protocol buffers that are shared throughout HBase +package hbase.pb; + +option java_package = "org.apache.hadoop.hbase.shaded.protobuf.generated"; +option java_outer_classname = "ActiveClusterSuffixProtos"; +option java_generate_equals_and_hash = true; +option optimize_for = SPEED; + +/** + * Content of the '/hbase/active_cluster_suffix.id' file to indicate the active cluster. + */ +message ActiveClusterSuffix { + // This is the active cluster suffix set by the user in the config, as a String + required string active_cluster_suffix = 1; Review Comment: Are you going to store only the suffix? Suffix is usually empty for the active cluster, so you'll write an empty file in that case. I'd suggest to add the cluster id as well: "clusterId_metasuffix". ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java: ########## @@ -382,4 +392,106 @@ public void stop() { public void logFileSystemState(Logger log) throws IOException { CommonFSUtils.logFileSystemState(fs, rootdir, log); } + + // This API should be to be called on the 'old active cluster' before the switch over, + // to delete the associated active_cluster.id file. + public boolean prepareForSwitchOver() { + boolean status = true; + final Path idFile = new Path(rootdir, HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME); + try { + LOG.info("Deleting active cluster ID file"); + FSUtils.removeFile(fs, idFile); + } catch (IOException ioe) { + LOG.warn("Failed to delete the active cluster ID file due to IOException", ioe); + status = false; + } + return status; + } + + // This API should be called on the 'new active cluster' to complete the process of switch over, + // to create the new active_cluster.id file. + public boolean performSwitchOver() { Review Comment: Where do you use this method? ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java: ########## @@ -382,4 +392,106 @@ public void stop() { public void logFileSystemState(Logger log) throws IOException { CommonFSUtils.logFileSystemState(fs, rootdir, log); } + + // This API should be to be called on the 'old active cluster' before the switch over, + // to delete the associated active_cluster.id file. + public boolean prepareForSwitchOver() { Review Comment: Where do you use this method? ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java: ########## @@ -382,4 +392,106 @@ public void stop() { public void logFileSystemState(Logger log) throws IOException { CommonFSUtils.logFileSystemState(fs, rootdir, log); } + + // This API should be to be called on the 'old active cluster' before the switch over, + // to delete the associated active_cluster.id file. + public boolean prepareForSwitchOver() { + boolean status = true; + final Path idFile = new Path(rootdir, HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME); + try { + LOG.info("Deleting active cluster ID file"); + FSUtils.removeFile(fs, idFile); + } catch (IOException ioe) { + LOG.warn("Failed to delete the active cluster ID file due to IOException", ioe); + status = false; + } + return status; + } + + // This API should be called on the 'new active cluster' to complete the process of switch over, + // to create the new active_cluster.id file. + public boolean performSwitchOver() { + LOG.info("Creating new cluster ID file during switchover"); + boolean status = true; + int wait = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); + try { + String ms = conf.get(HConstants.HBASE_META_TABLE_SUFFIX, + HConstants.HBASE_META_TABLE_SUFFIX_DEFAULT_VALUE); + if ( + !FSUtils.checkFileExistsInHbaseRootDir(fs, rootdir, + HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME, wait) + ) { + FSUtils.setActiveClusterSuffix(fs, rootdir, new ActiveClusterSuffix(ms), wait); + } + activeClusterSuffix = FSUtils.getActiveClusterSuffix(fs, rootdir); + } catch (IOException ioe) { + // reset the cluster id + status = false; + LOG.error("Failed to create active cluster id file during switchover, exception", ioe); + } + return status; + } + + private void negotiateActiveClusterIdFile(long wait) throws IOException { + LOG.info("Active cluster Suffix file"); + String suffix_file_name = HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME; + boolean activeClusterSuffixFileExists = + FSUtils.checkFileExistsInHbaseRootDir(fs, rootdir, suffix_file_name, wait); + + if (!isReadOnlyModeEnabled(conf)) { + // this is the active cluster, create active cluster suffix file if it does not exist + if (!activeClusterSuffixFileExists) { + FSUtils.setActiveClusterSuffix(fs, rootdir, + new ActiveClusterSuffix(getActiveClusterSuffixFromConfig(conf)), wait); + } else { + // verify the contents against the config set + ActiveClusterSuffix acs = FSUtils.getActiveClusterSuffix(fs, rootdir); + if ( + acs.equals(getActiveClusterSuffix()) + || (acs.toString().equals("null") && getActiveClusterSuffix().toString().isEmpty()) Review Comment: We should add the clusterId to the file contents, in which case we could verify both: clusterId matches and suffix matches. ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java: ########## @@ -382,4 +392,106 @@ public void stop() { public void logFileSystemState(Logger log) throws IOException { CommonFSUtils.logFileSystemState(fs, rootdir, log); } + + // This API should be to be called on the 'old active cluster' before the switch over, + // to delete the associated active_cluster.id file. + public boolean prepareForSwitchOver() { + boolean status = true; + final Path idFile = new Path(rootdir, HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME); + try { + LOG.info("Deleting active cluster ID file"); + FSUtils.removeFile(fs, idFile); + } catch (IOException ioe) { + LOG.warn("Failed to delete the active cluster ID file due to IOException", ioe); + status = false; + } + return status; + } + + // This API should be called on the 'new active cluster' to complete the process of switch over, + // to create the new active_cluster.id file. + public boolean performSwitchOver() { + LOG.info("Creating new cluster ID file during switchover"); + boolean status = true; + int wait = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); + try { + String ms = conf.get(HConstants.HBASE_META_TABLE_SUFFIX, + HConstants.HBASE_META_TABLE_SUFFIX_DEFAULT_VALUE); + if ( + !FSUtils.checkFileExistsInHbaseRootDir(fs, rootdir, + HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME, wait) + ) { + FSUtils.setActiveClusterSuffix(fs, rootdir, new ActiveClusterSuffix(ms), wait); + } + activeClusterSuffix = FSUtils.getActiveClusterSuffix(fs, rootdir); + } catch (IOException ioe) { + // reset the cluster id + status = false; + LOG.error("Failed to create active cluster id file during switchover, exception", ioe); + } + return status; + } + + private void negotiateActiveClusterIdFile(long wait) throws IOException { + LOG.info("Active cluster Suffix file"); + String suffix_file_name = HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME; + boolean activeClusterSuffixFileExists = + FSUtils.checkFileExistsInHbaseRootDir(fs, rootdir, suffix_file_name, wait); + + if (!isReadOnlyModeEnabled(conf)) { + // this is the active cluster, create active cluster suffix file if it does not exist + if (!activeClusterSuffixFileExists) { + FSUtils.setActiveClusterSuffix(fs, rootdir, + new ActiveClusterSuffix(getActiveClusterSuffixFromConfig(conf)), wait); + } else { + // verify the contents against the config set + ActiveClusterSuffix acs = FSUtils.getActiveClusterSuffix(fs, rootdir); + if ( + acs.equals(getActiveClusterSuffix()) + || (acs.toString().equals("null") && getActiveClusterSuffix().toString().isEmpty()) + ) { + LOG.info("Active cluster is being restarted, Suffix {}", acs); Review Comment: I'm not sure if 'restart' is always the case. Instead I'd log something like: > I'm the active cluster on this storage location ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java: ########## @@ -382,4 +392,106 @@ public void stop() { public void logFileSystemState(Logger log) throws IOException { CommonFSUtils.logFileSystemState(fs, rootdir, log); } + + // This API should be to be called on the 'old active cluster' before the switch over, + // to delete the associated active_cluster.id file. + public boolean prepareForSwitchOver() { + boolean status = true; + final Path idFile = new Path(rootdir, HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME); + try { + LOG.info("Deleting active cluster ID file"); + FSUtils.removeFile(fs, idFile); + } catch (IOException ioe) { + LOG.warn("Failed to delete the active cluster ID file due to IOException", ioe); + status = false; + } + return status; + } + + // This API should be called on the 'new active cluster' to complete the process of switch over, + // to create the new active_cluster.id file. + public boolean performSwitchOver() { + LOG.info("Creating new cluster ID file during switchover"); + boolean status = true; + int wait = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); + try { + String ms = conf.get(HConstants.HBASE_META_TABLE_SUFFIX, + HConstants.HBASE_META_TABLE_SUFFIX_DEFAULT_VALUE); + if ( + !FSUtils.checkFileExistsInHbaseRootDir(fs, rootdir, + HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME, wait) + ) { + FSUtils.setActiveClusterSuffix(fs, rootdir, new ActiveClusterSuffix(ms), wait); + } + activeClusterSuffix = FSUtils.getActiveClusterSuffix(fs, rootdir); + } catch (IOException ioe) { + // reset the cluster id + status = false; + LOG.error("Failed to create active cluster id file during switchover, exception", ioe); + } + return status; + } + + private void negotiateActiveClusterIdFile(long wait) throws IOException { + LOG.info("Active cluster Suffix file"); + String suffix_file_name = HConstants.ACTIVE_CLUSTER_SUFFIX_FILE_NAME; + boolean activeClusterSuffixFileExists = + FSUtils.checkFileExistsInHbaseRootDir(fs, rootdir, suffix_file_name, wait); + + if (!isReadOnlyModeEnabled(conf)) { + // this is the active cluster, create active cluster suffix file if it does not exist + if (!activeClusterSuffixFileExists) { + FSUtils.setActiveClusterSuffix(fs, rootdir, + new ActiveClusterSuffix(getActiveClusterSuffixFromConfig(conf)), wait); + } else { + // verify the contents against the config set + ActiveClusterSuffix acs = FSUtils.getActiveClusterSuffix(fs, rootdir); + if ( + acs.equals(getActiveClusterSuffix()) + || (acs.toString().equals("null") && getActiveClusterSuffix().toString().isEmpty()) + ) { + LOG.info("Active cluster is being restarted, Suffix {}", acs); + } else { + // throw error + LOG.info("rootdir {} : Active Cluster File Suffix {} ", rootdir, acs); + throw new IOException("Can not start active cluster, please check the value of " + + "configuration hbase.global.readonly.enabled in hbase-site.xml, " + + "Active Cluster Suffix : " + getActiveClusterSuffix() + + ": Active Cluster File Suffix " + acs); + } + } + } else { + // this is a replica cluster + if (!activeClusterSuffixFileExists) { + // suffix file must exist, if not throw error + LOG.info("rootdir {}", rootdir); + throw new IOException("Read Replica Cluster must have associated Active Cluster Suffix " + + "file. File 'active_cluster_suffix.id' does not exist in $HBASE_ROOT."); + } Review Comment: This case doesn't necesserily have to be an error: users should be able to start the read replica clusters without and active cluster running. The new file is actually only important for non-RO mode clusters, RO instances don't need to check it at all. ########## hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestClusterId.java: ########## @@ -114,5 +114,4 @@ public void testRewritingClusterIdToPB() throws Exception { HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); assertEquals(1, master.getServerManager().getOnlineServersList().size()); } - Review Comment: Please remove this change. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
