This is an automated email from the ASF dual-hosted git repository. liyang pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 091e4709f3d405ee764b2da176a483f63a7b4e97 Author: Hang Jia <754332...@qq.com> AuthorDate: Mon Nov 6 15:07:46 2023 +0800 KYLIN-5866 Desensitization of IP information in diagnostic package --- .../org/apache/kylin/common/KylinConfigBase.java | 4 + .../kylin/tool/AbstractInfoExtractorTool.java | 16 ++ .../org/apache/kylin/tool/obf/IpObfuscator.java | 259 +++++++++++++++++++++ .../java/org/apache/kylin/tool/util/ToolUtil.java | 18 ++ .../org/apache/kylin/tool/DiagClientToolTest.java | 119 ++++++++++ .../org/apache/kylin/tool/util/ToolUtilTest.java | 9 + 6 files changed, 425 insertions(+) diff --git a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index ecd74eb9d5..58da97650a 100644 --- a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -3662,6 +3662,10 @@ public abstract class KylinConfigBase implements Serializable { return getOptional("kylin.diag.obf.level", "OBF").toUpperCase(Locale.ROOT); } + public boolean isDiagIpObfEnabled() { + return Boolean.parseBoolean(getOptional("kylin.diag.ip-obf-enabled", FALSE)); + } + public boolean isDimensionRangeFilterEnabled() { return Boolean.parseBoolean(getOptional("kylin.storage.columnar.dimension-range-filter-enabled", TRUE)); } diff --git a/src/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractorTool.java b/src/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractorTool.java index 1d5a567c36..b1733ca6f9 100644 --- a/src/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractorTool.java +++ b/src/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractorTool.java @@ -99,6 +99,7 @@ import org.apache.kylin.rest.util.SpringContext; import org.apache.kylin.tool.constant.DiagSubTaskEnum; import org.apache.kylin.tool.constant.SensitiveConfigKeysConstant; import org.apache.kylin.tool.constant.StageEnum; +import org.apache.kylin.tool.obf.IpObfuscator; import org.apache.kylin.tool.obf.KylinConfObfuscator; import org.apache.kylin.tool.obf.MappingRecorder; import org.apache.kylin.tool.obf.ObfLevel; @@ -312,6 +313,21 @@ public abstract class AbstractInfoExtractorTool extends ExecutableApplication { kylinConfObfuscator.obfuscate(new File(rootDir, SensitiveConfigKeysConstant.CONF_DIR), file -> (file.isFile() && file.getName().startsWith(SensitiveConfigKeysConstant.KYLIN_PROPERTIES))); } + + obfIpDiag(rootDir, obfLevel); + } + + private void obfIpDiag(File rootDir, ObfLevel obfLevel) throws IOException { + if (!(obfLevel == ObfLevel.OBF && kylinConfig.isDiagIpObfEnabled())) { + return; + } + logger.info("diag start obf ip"); + try (MappingRecorder recorder = new MappingRecorder(null)) { + ResultRecorder resultRecorder = new ResultRecorder(); + IpObfuscator ipObfuscator = new IpObfuscator(obfLevel, recorder, resultRecorder); + ipObfuscator.obfuscate(rootDir, null); + } + logger.info("diag end obf ip"); } private boolean isDiag() { diff --git a/src/tool/src/main/java/org/apache/kylin/tool/obf/IpObfuscator.java b/src/tool/src/main/java/org/apache/kylin/tool/obf/IpObfuscator.java new file mode 100644 index 0000000000..b7d63a53e8 --- /dev/null +++ b/src/tool/src/main/java/org/apache/kylin/tool/obf/IpObfuscator.java @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kylin.tool.obf; + +import static org.apache.kylin.tool.constant.SensitiveConfigKeysConstant.HIDDEN; +import static org.apache.kylin.tool.util.ToolUtil.existsLinuxCommon; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileFilter; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStreamReader; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.curator.shaded.com.google.common.collect.Maps; +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Array; +import org.apache.kylin.common.util.BufferedLogger; +import org.apache.kylin.common.util.CliCommandExecutor; +import org.apache.kylin.common.util.ShellException; +import org.apache.kylin.common.util.ZipFileUtils; + +import lombok.val; + +public class IpObfuscator extends FileObfuscator { + + private List<String> zipFiles = new ArrayList<>(); + + private Map<String, String> ipDirHiddens = Maps.newHashMap(); + + private static final String IP_NETWORK_NUMBER = "((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]?\\d)\\.){3}"; + private static final String IP_HOST_NUMBER = "(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|[1-9])"; + + private static final String IP_PATTERN = IP_NETWORK_NUMBER + IP_HOST_NUMBER; + + private static final int BUFFER_SIZE = 10 * 1024 * 1024; + + private static final Pattern IP_PATTERN_PATTERN = Pattern.compile("(.*)" + IP_PATTERN + "(.*)"); + + public IpObfuscator(ObfLevel level, MappingRecorder recorder, ResultRecorder resultRecorder) { + super(level, recorder, resultRecorder); + } + + @Override + public void doObfuscateFile(File orig) { + String path = orig.getAbsolutePath(); + File tmpFile = new File(orig.getAbsolutePath() + ".tmp"); + + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(orig), Charset.defaultCharset()), BUFFER_SIZE); + BufferedWriter bw = new BufferedWriter( + new OutputStreamWriter(new FileOutputStream(tmpFile), Charset.defaultCharset()), BUFFER_SIZE)) { + String readLine; + while ((readLine = reader.readLine()) != null) { + bw.write(readLine.replaceAll(IP_PATTERN, HIDDEN)); + bw.newLine(); + } + bw.flush(); + boolean renameResult = tmpFile.renameTo(orig); + if (!renameResult) { + logger.error("{} processed failed", path); + resultRecorder.addFailedFile(path); + } + logger.info("{} processed successfully", path); + resultRecorder.addSuccessFile(path); + } catch (IOException e) { + logger.error("{} processed failed", path, e); + resultRecorder.addFailedFile(path); + } + } + + @Override + public void obfuscate(File orig, FileFilter fileFilter) { + ipObfuscatorPrepare(orig); + if (fileFilter == null && existsLinuxCommon("sed")) { + obfuscateByLinux(orig); + } else { + super.obfuscate(orig, fileFilter); + } + ipObfuscatorClean(); + } + + public void obfuscateByLinux(File rootDir) { + logger.info("diag start obf ip by linux"); + String rex = "([0-9]{1,3}\\.){3}(([0-9]{2,3})|[1-9])"; + String finalCommand = String.format(Locale.ROOT, + "cd %s/ && sed -ri 's/%s/<hidden>/g' $(grep -E '%s' -rl %s)", KylinConfig.getKylinHome(), rex, rex, + rootDir.getAbsolutePath()); + CliCommandExecutor commandExecutor = new CliCommandExecutor(); + val patternedLogger = new BufferedLogger(logger); + String uuid = rootDir.getAbsolutePath(); + + try { + logger.info("command = {}", finalCommand); + commandExecutor.execute(finalCommand, patternedLogger, uuid); + logger.info("diag end obf ip by linux"); + } catch (ShellException e) { + logger.error("Failed to execute obf ip diag by linux", e); + super.obfuscate(rootDir, null); + } + } + + public static boolean existsIp(String text) { + Matcher ipMatcher = IP_PATTERN_PATTERN.matcher(text); + return ipMatcher.matches(); + } + + public void ipObfuscatorPrepare(File root) { + logger.info("ip obfuscator do prepare"); + this.zipFiles = findZipFileAndUnzip(root); + this.ipDirHiddens = findIpDir(root); + changeZipFileByIpDirs(root); + moveFileByIpDirs(); + } + + public void moveFileByIpDirs() { + logger.info("ip obfuscator do move file by ip dirs"); + for (Map.Entry<String, String> entry : this.ipDirHiddens.entrySet()) { + File srcFile = new File(entry.getKey()); + File destFile = new File(entry.getValue()); + + destFile.getParentFile().mkdirs(); + try { + FileUtils.moveFile(srcFile, destFile); + removeEmptyParentsDirectory(srcFile.getParentFile()); + } catch (IOException e) { + logger.error("{} processed move file By ipDirs failed", entry.getKey(), e); + resultRecorder.addFailedFile(entry.getKey()); + } + } + } + + /** + * remove all empty parents directory + * @param parentFile + */ + public static void removeEmptyParentsDirectory(File parentFile) throws IOException { + if (Array.isEmpty(parentFile.listFiles())) { + Files.delete(parentFile.toPath()); + removeEmptyParentsDirectory(parentFile.getParentFile()); + } + + } + + private void changeZipFileByIpDirs(File root) { + logger.info("ip obfuscator do change zip file name by ip dirs"); + List<String> ipDirZips = zipFiles.stream().filter(file -> ipDirHiddens.containsKey(file)) + .collect(Collectors.toList()); + if (ipDirZips.isEmpty()) { + return; + } + this.zipFiles.removeAll(ipDirZips); + List<String> hiddenIpZipFiles = ipDirZips.stream().map(ipDirZip -> getHiddenIpDir(root, ipDirZip)) + .collect(Collectors.toList()); + this.zipFiles.addAll(hiddenIpZipFiles); + } + + private Map<String, String> findIpDir(File root) { + List<String> ipDirs = findFiles(root, file -> existsIp(file.getAbsolutePath())); + Map<String, String> ipDirHidden = Maps.newHashMap(); + if (ipDirs.isEmpty()) { + return ipDirHidden; + } + + for (String ipDir : ipDirs) { + String hiddenIpDir = getHiddenIpDir(root, ipDir); + ipDirHidden.put(ipDir, hiddenIpDir); + } + return ipDirHidden; + } + + public static String getHiddenIpDir(File root, String ipDir) { + return root.getAbsolutePath() + ipDir.replace(root.getAbsolutePath(), "").replaceAll(IP_PATTERN, HIDDEN); + } + + public List<String> findZipFileAndUnzip(File root) { + List<String> zipFileList = findFiles(root, file -> file.getName().endsWith(".zip")); + + for (String zipFile : zipFileList) { + try { + ZipFileUtils.decompressZipFile(zipFile, new File(zipFile).getParent()); + } catch (IOException e) { + logger.error("{} processed decompress zip failed", zipFile, e); + resultRecorder.addFailedFile(zipFile); + } + } + return zipFileList; + } + + public void ipObfuscatorClean() { + logger.info("ip obfuscator do clean"); + compressZipFileAndCleanZipDir(); + } + + private void compressZipFileAndCleanZipDir() { + logger.info("ip obfuscator do compress zip file and clean zip dir"); + for (String zipFile : zipFiles) { + try { + String zipDir = zipFile.replace(".zip", ""); + ZipFileUtils.compressZipFile(zipDir, zipFile); + FileUtils.deleteQuietly(new File(zipDir)); + } catch (IOException e) { + logger.error("{} processed ip Obfuscator compress zip failed", zipFile, e); + resultRecorder.addFailedFile(zipFile); + } + } + } + + public static List<String> findFiles(File dirName, Function<File, Boolean> findFunction) { + List<String> zipFiles = new ArrayList<>(); + if (dirName == null) { + return zipFiles; + } + File[] files = dirName.listFiles(); + if (files == null) { + return zipFiles; + } + for (File subFile : files) { + if (subFile.isDirectory()) { + List<String> zipFilePath = findFiles(subFile, findFunction); + zipFiles.addAll(zipFilePath); + } else { + if (Boolean.TRUE.equals(findFunction.apply(subFile))) { + zipFiles.add(subFile.getAbsolutePath()); + } + } + } + return zipFiles; + } +} diff --git a/src/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java b/src/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java index 71e1405a5a..5417e57afd 100644 --- a/src/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java +++ b/src/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java @@ -37,11 +37,13 @@ import org.apache.hadoop.fs.Path; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.util.AddressUtil; +import org.apache.kylin.common.util.BufferedLogger; import org.apache.kylin.common.util.CliCommandExecutor; import org.apache.kylin.common.util.HadoopUtil; import org.apache.kylin.common.util.ProcessUtils; import org.apache.kylin.common.util.ShellException; import org.apache.kylin.guava30.shaded.common.base.Preconditions; +import org.apache.kylin.guava30.shaded.common.base.Strings; import org.apache.kylin.query.util.ExtractFactory; import org.apache.spark.sql.SparderEnv; @@ -213,4 +215,20 @@ public class ToolUtil { final String JOB_TMP = "job_tmp"; return getHdfsPrefix() + File.separator + project + File.separator + JOB_TMP; } + + public static boolean existsLinuxCommon(String common) { + String finalCommand = "command -v " + common; + CliCommandExecutor commandExecutor = new CliCommandExecutor(); + val patternedLogger = new BufferedLogger(log); + try { + log.info("command = {}", finalCommand); + val execute = commandExecutor.execute(finalCommand, patternedLogger); + if (!Strings.isNullOrEmpty(execute.getCmd())) { + return true; + } + } catch (ShellException e) { + log.error("Failed to execute linux common: " + finalCommand, e); + } + return false; + } } diff --git a/src/tool/src/test/java/org/apache/kylin/tool/DiagClientToolTest.java b/src/tool/src/test/java/org/apache/kylin/tool/DiagClientToolTest.java index add2c81222..9b0ac9df43 100644 --- a/src/tool/src/test/java/org/apache/kylin/tool/DiagClientToolTest.java +++ b/src/tool/src/test/java/org/apache/kylin/tool/DiagClientToolTest.java @@ -21,18 +21,22 @@ import static org.apache.kylin.common.exception.code.ErrorCodeTool.PARAMETER_TIM import java.io.File; import java.io.IOException; +import java.util.Arrays; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.io.FileUtils; +import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.exception.KylinException; +import org.apache.kylin.common.util.CliCommandExecutor; import org.apache.kylin.common.util.ExecutorServiceUtil; import org.apache.kylin.common.util.NLocalFileMetadataTestCase; import org.apache.kylin.common.util.ZipFileUtils; import org.apache.kylin.tool.constant.SensitiveConfigKeysConstant; import org.apache.kylin.tool.obf.KylinConfObfuscatorTest; import org.apache.kylin.tool.snapshot.SnapshotSourceTableStatsTool; +import org.apache.kylin.tool.util.ToolUtil; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -40,11 +44,20 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.rules.TestName; +import org.junit.runner.RunWith; import org.mockito.Mockito; +import org.powermock.api.mockito.PowerMockito; +import org.powermock.core.classloader.annotations.PowerMockIgnore; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; import lombok.val; import lombok.var; +@RunWith(PowerMockRunner.class) +@PrepareForTest({ ToolUtil.class }) +@PowerMockIgnore({ "javax.management.*", "javax.script.*", "org.apache.hadoop.*", "javax.security.*", + "javax.crypto.*" }) public class DiagClientToolTest extends NLocalFileMetadataTestCase { @Rule @@ -144,6 +157,112 @@ public class DiagClientToolTest extends NLocalFileMetadataTestCase { } + @Test + public void testObfIpConfig() throws IOException { + File mainDir = new File(temporaryFolder.getRoot(), testName.getMethodName()); + FileUtils.forceMkdir(mainDir); + + ipObfDataPrepare(); + + getTestConfig().setProperty("kylin.diag.obf.level", "RAW"); + getTestConfig().setProperty("kylin.diag.ip-obf-enabled", "true"); + + DiagClientTool diagClientTool = new DiagClientTool(); + + diagClientTool.execute(new String[] { "-destDir", mainDir.getAbsolutePath() }); + + File zipFile = mainDir.listFiles()[0].listFiles()[0]; + File exportFile = new File(mainDir, "output"); + FileUtils.forceMkdir(exportFile); + ZipFileUtils.decompressZipFile(zipFile.getAbsolutePath(), exportFile.getAbsolutePath()); + File baseDiagFile = exportFile.listFiles()[0]; + val properties = org.apache.kylin.common.util.FileUtils + .readFromPropertiesFile(new File(baseDiagFile, "conf/kylin.properties")); + Assert.assertFalse(properties.get("kylin.metadata.url").contains(SensitiveConfigKeysConstant.HIDDEN + ":3306")); + Assert.assertFalse(new File(baseDiagFile, "conf/clickhouse01_<hidden>_8080.zip").exists()); + Assert.assertTrue(new File(baseDiagFile, "conf/clickhouse01_192.1.2.1_8080.zip").exists()); + + } + + @Test + public void testIpObfByLinux() throws Exception { + File mainDir = new File(temporaryFolder.getRoot(), testName.getMethodName()); + FileUtils.forceMkdir(mainDir); + + ipObfDataPrepare(); + + getTestConfig().setProperty("kylin.diag.ip-obf-enabled", "true"); + + DiagClientTool diagClientTool = new DiagClientTool(); + + diagClientTool.execute(new String[] { "-destDir", mainDir.getAbsolutePath() }); + + checkIpObf(mainDir); + } + + @Test + public void testIpObfByJava() throws Exception { + File mainDir = new File(temporaryFolder.getRoot(), testName.getMethodName()); + FileUtils.forceMkdir(mainDir); + + ipObfDataPrepare(); + + getTestConfig().setProperty("kylin.diag.ip-obf-enabled", "true"); + + CliCommandExecutor commandExecutor = PowerMockito.mock(CliCommandExecutor.class); + PowerMockito.whenNew(CliCommandExecutor.class).withNoArguments().thenReturn(commandExecutor); + + PowerMockito.doReturn(new CliCommandExecutor.CliCmdExecResult(0, "", null)).when(commandExecutor) + .execute(Mockito.contains("sed"), Mockito.any()); + + DiagClientTool diagClientTool = new DiagClientTool(); + + diagClientTool.execute(new String[] { "-destDir", mainDir.getAbsolutePath() }); + + checkIpObf(mainDir); + } + + private void ipObfDataPrepare() throws IOException { + val url = KylinConfig.getInstanceFromEnv().getMetadataUrl(); + File path = new File(url.getIdentifier()); + File confPath = new File(path.getParentFile(), "conf"); + + FileUtils.writeStringToFile(new File(confPath, "a.log"), "111111111111_192.1.2.1"); + String zipDirName = "clickhouse01_192.1.2.1_8080"; + FileUtils.forceMkdir(new File(confPath, zipDirName)); + FileUtils.writeStringToFile(new File(confPath, zipDirName + "/c1_192.1.2.1.txt"), "333333333333 192.1.2.1"); + + String zipDirname = confPath.getAbsolutePath() + "/" + zipDirName; + ZipFileUtils.compressZipFile(zipDirname, zipDirname + ".zip"); + } + + private void checkIpObf(File mainDir) throws IOException { + File zipFile = mainDir.listFiles()[0].listFiles()[0]; + File exportFile = new File(mainDir, "output"); + FileUtils.forceMkdir(exportFile); + ZipFileUtils.decompressZipFile(zipFile.getAbsolutePath(), exportFile.getAbsolutePath()); + File baseDiagFile = exportFile.listFiles()[0]; + val properties = org.apache.kylin.common.util.FileUtils + .readFromPropertiesFile(new File(baseDiagFile, "conf/kylin.properties")); + Assert.assertTrue(properties.get("kylin.metadata.url").contains(SensitiveConfigKeysConstant.HIDDEN + ":3306")); + + val logIp = FileUtils.readFileToString(new File(baseDiagFile, "conf/a.log")); + Assert.assertTrue(logIp.contains(SensitiveConfigKeysConstant.HIDDEN)); + + Arrays.stream(baseDiagFile.listFiles()).filter(file->file.getName().contains("conf")).forEach(s-> Arrays.stream(s.listFiles()).forEach(System.out::println)); + + Assert.assertTrue(new File(baseDiagFile, "conf/clickhouse01_<hidden>_8080.zip").exists()); + + ZipFileUtils.decompressZipFile(baseDiagFile + "/conf/clickhouse01_<hidden>_8080.zip", + baseDiagFile + "/conf/"); + + Arrays.stream(new File(baseDiagFile + "/conf/clickhouse01_<hidden>_8080").listFiles()).forEach(System.out::println); + Assert.assertTrue(new File(baseDiagFile + "/conf/clickhouse01_<hidden>_8080", "c1_<hidden>.txt").exists()); + String unzipTxt = FileUtils + .readFileToString(new File(baseDiagFile + "/conf/clickhouse01_<hidden>_8080", "c1_<hidden>.txt")); + Assert.assertTrue(unzipTxt.contains(SensitiveConfigKeysConstant.HIDDEN)); + } + @Test public void testExportSourceTableStats() { DiagClientTool diagClientTool = new DiagClientTool(); diff --git a/src/tool/src/test/java/org/apache/kylin/tool/util/ToolUtilTest.java b/src/tool/src/test/java/org/apache/kylin/tool/util/ToolUtilTest.java index c392e87932..86e9dc7641 100644 --- a/src/tool/src/test/java/org/apache/kylin/tool/util/ToolUtilTest.java +++ b/src/tool/src/test/java/org/apache/kylin/tool/util/ToolUtilTest.java @@ -99,4 +99,13 @@ public class ToolUtilTest extends NLocalFileMetadataTestCase { String path = ToolUtil.getHdfsJobTmpDir("abcd"); Assert.assertTrue(path.endsWith("abcd/job_tmp")); } + + @Test + public void testExistsLinuxCommon() { + boolean abcdExist = ToolUtil.existsLinuxCommon("abcd"); + Assert.assertFalse(abcdExist); + + boolean lsExist = ToolUtil.existsLinuxCommon("ls"); + Assert.assertTrue(lsExist); + } }