[ https://issues.apache.org/jira/browse/HBASE-20338?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Duo Zhang updated HBASE-20338: ------------------------------ Fix Version/s: 2.0.0 (was: 3.0.0-alpha-1) (was: 2.1.0) (was: 2.0.1) > WALProcedureStore#recoverLease() should have fixed sleeps for retrying > rollWriter() > ----------------------------------------------------------------------------------- > > Key: HBASE-20338 > URL: https://issues.apache.org/jira/browse/HBASE-20338 > Project: HBase > Issue Type: Bug > Affects Versions: 2.0.0-beta-2 > Reporter: Umesh Agashe > Assignee: Wei-Chiu Chuang > Priority: Major > Fix For: 2.0.0 > > Attachments: HBASE-20338.master.001.patch, > HBASE-20338.master.002.patch, HBASE-20338.master.003.patch, > HBASE-20338.master.004.patch, HBASE-20338.master.005.patch > > > In our internal testing we observed that logs are getting flooded due to > continuous loop in WALProcedureStore#recoverLease(): > {code} > while (isRunning()) { > // Get Log-MaxID and recover lease on old logs > try { > flushLogId = initOldLogs(oldLogs); > } catch (FileNotFoundException e) { > LOG.warn("Someone else is active and deleted logs. retrying.", e); > oldLogs = getLogFiles(); > continue; > } > // Create new state-log > if (!rollWriter(flushLogId + 1)) { > // someone else has already created this log > LOG.debug("Someone else has already created log " + flushLogId); > continue; > } > {code} > rollWriter() fails to create a new file. Error messages in HDFS namenode logs > around same time: > {code} > INFO org.apache.hadoop.ipc.Server: IPC Server handler 3 on 8020, call > org.apache.hadoop.hdfs.protocol.ClientProtocol.create from > 172.31.121.196:38508 Call#3141 Retry#0 > java.io.IOException: Exeption while contacting value generator > at > org.apache.hadoop.crypto.key.kms.ValueQueue.getAtMost(ValueQueue.java:389) > at > org.apache.hadoop.crypto.key.kms.ValueQueue.getNext(ValueQueue.java:291) > at > org.apache.hadoop.crypto.key.kms.KMSClientProvider.generateEncryptedKey(KMSClientProvider.java:724) > at > org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.generateEncryptedKey(KeyProviderCryptoExtension.java:511) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem$2.run(FSNamesystem.java:2680) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem$2.run(FSNamesystem.java:2676) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920) > at > org.apache.hadoop.security.SecurityUtil.doAsUser(SecurityUtil.java:477) > at > org.apache.hadoop.security.SecurityUtil.doAsLoginUser(SecurityUtil.java:458) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.generateEncryptedDataEncryptionKey(FSNamesystem.java:2675) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2815) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2712) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:604) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2226) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2222) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2220) > Caused by: java.net.ConnectException: Connection refused (Connection refused) > at java.net.PlainSocketImpl.socketConnect(Native Method) > at > java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) > at > java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) > at > java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) > at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) > at java.net.Socket.connect(Socket.java:589) > at sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:673) > at sun.net.NetworkClient.doConnect(NetworkClient.java:175) > at sun.net.www.http.HttpClient.openServer(HttpClient.java:463) > at sun.net.www.http.HttpClient.openServer(HttpClient.java:558) > at sun.net.www.protocol.https.HttpsClient.<init>(HttpsClient.java:264) > at sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:367) > at > sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:191) > at > sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1138) > at > sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1032) > at > sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:177) > at > sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1546) > at > sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1474) > at > java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480) > at > sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:338) > at > org.apache.hadoop.crypto.key.kms.KMSClientProvider.call(KMSClientProvider.java:503) > at > org.apache.hadoop.crypto.key.kms.KMSClientProvider.call(KMSClientProvider.java:488) > at > org.apache.hadoop.crypto.key.kms.KMSClientProvider.access$200(KMSClientProvider.java:94) > at > org.apache.hadoop.crypto.key.kms.KMSClientProvider$EncryptedQueueRefiller.fillQueueForKey(KMSClientProvider.java:149) > at > org.apache.hadoop.crypto.key.kms.ValueQueue.getAtMost(ValueQueue.java:378) > ... 25 more > {code} > Both HDFS NameNode and HBase Master logs are filling up. -- This message was sent by Atlassian Jira (v8.20.10#820010)