This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new 1b4716a KYLIN-4879 Fix the function of remove sql comments 1b4716a is described below commit 1b4716aa940779999f83832cf52c84de4a0edbbc Author: yaqian.zhang <598593...@qq.com> AuthorDate: Wed Mar 10 16:58:10 2021 +0800 KYLIN-4879 Fix the function of remove sql comments --- pom.xml | 6 + query/pom.xml | 27 +++- .../javacc/org/apache/kylin/query/util/.gitignore | 10 ++ .../org/apache/kylin/query/util/CommentParser.jj | 161 +++++++++++++++++++++ .../org/apache/kylin/query/util/QueryUtil.java | 17 +-- .../org/apache/kylin/query/util/QueryUtilTest.java | 73 ++++++++-- 6 files changed, 269 insertions(+), 25 deletions(-) diff --git a/pom.xml b/pom.xml index 1b05daf..ed14a73 100644 --- a/pom.xml +++ b/pom.xml @@ -1493,6 +1493,12 @@ <bundledSignature>jdk-deprecated</bundledSignature> <!--<bundledSignature>jdk-non-portable</bundledSignature>--> </bundledSignatures> + <excludes> + <exclude>**/ParseException.class</exclude> + <exclude>**/SimpleCharStream.class</exclude> + <exclude>**/*TokenManager.class</exclude> + <exclude>**/TokenMgrError.class</exclude> + </excludes> <signaturesFiles> <signaturesFile> ${user.dir}/dev-support/signatures.txt diff --git a/query/pom.xml b/query/pom.xml index ce338f2..a9f9358 100644 --- a/query/pom.xml +++ b/query/pom.xml @@ -17,7 +17,8 @@ limitations under the License. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <artifactId>kylin-query</artifactId> @@ -92,4 +93,28 @@ <version>${mockito.version}</version> </dependency> </dependencies> + <build> + <plugins> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>javacc-maven-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <id>javacc</id> + <goals> + <goal>javacc</goal> + </goals> + <configuration> + <sourceDirectory>src/main/codegen/javacc/org/apache/kylin/query/util</sourceDirectory> + <includes> + <include>*.jj</include> + </includes> + <isStatic>false</isStatic> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> </project> diff --git a/query/src/main/codegen/javacc/org/apache/kylin/query/util/.gitignore b/query/src/main/codegen/javacc/org/apache/kylin/query/util/.gitignore new file mode 100644 index 0000000..b978ba1 --- /dev/null +++ b/query/src/main/codegen/javacc/org/apache/kylin/query/util/.gitignore @@ -0,0 +1,10 @@ +/EscapeTransformer.java +/EscapeTransformerConstants.java +/EscapeTransformerTokenManager.java +/ParseException.java +/SimpleCharStream.java +/Token.java +/TokenMgrError.java +/EscapeParser.java +/EscapeParserConstants.java +/EscapeParserTokenManager.java diff --git a/query/src/main/codegen/javacc/org/apache/kylin/query/util/CommentParser.jj b/query/src/main/codegen/javacc/org/apache/kylin/query/util/CommentParser.jj new file mode 100644 index 0000000..53dcab1 --- /dev/null +++ b/query/src/main/codegen/javacc/org/apache/kylin/query/util/CommentParser.jj @@ -0,0 +1,161 @@ +options { + IGNORE_CASE = true; + STATIC = false; + UNICODE_INPUT=true; +} + +PARSER_BEGIN(CommentParser) +package org.apache.kylin.query.util; +import java.io.StringReader; +import java.util.List; +import java.util.ArrayList; +import java.util.Scanner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CommentParser { + + private final static Logger logger = LoggerFactory.getLogger(CommentParser.class); + + + public static void main(String args[]) throws ParseException { + System.out.println("Input SQL:"); + Scanner reader = new Scanner(System.in, "UTF-8"); + String sql = reader.nextLine(); + reader.close(); + CommentParser parser = new CommentParser(new StringReader(sql)); + String parseResult = parser.Input(); + System.out.println("Translated SQL:"); + System.out.println(parseResult); + } + + public CommentParser(String sql) { + this(new StringReader(sql)); + } +} + +PARSER_END(CommentParser) + +//< DEFAULT, ESCAPE, FUNCTION > +//SKIP : +//{ +// "\t" +//| "\n" +//| "\r" +//} + +<DEFAULT> +SKIP : { + "/*" : WITHINCOMMENT + | <"--" (~["\n","\r"])*> +} + +< DEFAULT > +TOKEN : +{ + < REMAIN_TOKEN : [" ", "\t","\n", "\r", ",", "/", "-" ] > +| < QUOTE: "'" > +| < QUOTED_STRING: <QUOTE> ( (~["'"]) | ("''"))* <QUOTE> > +| < DOUBLE_QUOTE: "\"" > +| < DOUBLE_QUOTE_STRING: <DOUBLE_QUOTE> ( (~["\""]) | ("\"\""))* <DOUBLE_QUOTE> > +| < ANY : (~[" ", ",", "\t", "\n", "\r", "/", "-" ])+ > +} + +<WITHINCOMMENT> +SKIP : +{ + "*/" : DEFAULT +} + +<WITHINCOMMENT> +MORE : +{ + < ~[] > +} + +/** Root production. */ +String Input() : +{ + String innerString; + StringBuilder transformedStr = new StringBuilder(); +} +{ + ( + LOOKAHEAD(2) + innerString = Expression() + { + transformedStr.append(innerString); + } + )+ + <EOF> + { + return transformedStr.toString(); + } +} + +/** Brace counting production. */ +String Expression() : +{ + String innerString = ""; + String nextString = ""; +} +{ + { + if (Thread.currentThread().isInterrupted()) { + throw new ParseException("CommentParser is interrupted"); + } + } + ( + innerString = QuotedString() + | innerString = DoubleQuoteString() + | innerString = RemainToken() + | innerString = Any() + ) + { + return innerString + nextString; + } +} + +String RemainToken() : +{} +{ + < REMAIN_TOKEN > + { + logger.trace("meet token <REMAIN_TOKEN>"); + return getToken(0).image; + } +} + +String QuotedString() : +{ + String s; +} +{ + <QUOTED_STRING> + { + logger.trace("meet token in <QUOTED_STRING>: " + getToken(0).image); + return getToken(0).image; + } +} + +String DoubleQuoteString() : +{ + String s; +} +{ + <DOUBLE_QUOTE_STRING> + { + logger.trace("meet token in <QUOTED2_STRING>: " + getToken(0).image); + return getToken(0).image; + } +} + +String Any() : +{} +{ + < ANY > + { + logger.trace("meet token in <ANY>: " + getToken(0).image); + return getToken(0).image; + } +} \ No newline at end of file diff --git a/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java b/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java index 551a907..8841427 100644 --- a/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java +++ b/query/src/main/java/org/apache/kylin/query/util/QueryUtil.java @@ -30,7 +30,6 @@ import org.apache.kylin.metadata.project.ProjectInstance; import org.apache.kylin.metadata.project.ProjectManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import com.google.common.collect.Lists; /** @@ -219,17 +218,15 @@ public class QueryUtil { || (sql1.startsWith(KEYWORD_EXPLAIN) && sql1.contains(KEYWORD_SELECT)); } - public static String removeCommentInSql(String sql1) { - // match two patterns, one is "-- comment", the other is "/* comment */" - final String[] commentPatterns = new String[] { "--(?!.*\\*/).*?[\r\n]", "/\\*(.|\r|\n)*?\\*/" }; - for (int i = 0; i < commentPatterns.length; i++) { - sql1 = sql1.replaceAll(commentPatterns[i], ""); + public static String removeCommentInSql(String sql) { + // match two patterns, one is "-- comment", the other is "/* comment */" + try { + return new CommentParser(sql).Input().trim(); + } catch (ParseException e) { + logger.error("Failed to parse sql: {}", sql, e); + return sql; } - - sql1 = sql1.trim(); - - return sql1; } public interface IQueryTransformer { diff --git a/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java b/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java index 9b769b8..fb9a431 100644 --- a/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java +++ b/query/src/test/java/org/apache/kylin/query/util/QueryUtilTest.java @@ -200,8 +200,8 @@ public class QueryUtilTest extends LocalFileMetadataTestCase { @Test public void testRemoveCommentInSql() { - String originSql = "select count(*) from test_kylin_fact where price > 10.0"; - String originSql2 = "select count(*) from test_kylin_fact where TEST_COLUMN != 'not--a comment'"; + final String originSql = "select count(*) from test_kylin_fact where price > 10.0"; + final String originSql2 = "select count(*) from test_kylin_fact where TEST_COLUMN != 'not--a comment'"; { String sqlWithComment = "-- comment \n" + originSql; @@ -294,18 +294,63 @@ public class QueryUtilTest extends LocalFileMetadataTestCase { Assert.assertEquals(originSql2, QueryUtil.removeCommentInSql(sqlWithComment2)); } - String content = " -- One-line comment and /**range\n" + - "/*\n" + - "Multi-line comment\r\n" + - "-- Multi-line comment*/\n" + - "select price as " + - "/*\n" + - "Multi-line comment\r\n" + - "-- Multi-line comment*/\n" + - "revenue from /*One-line comment-- One-line comment*/ v_lineitem;"; - String expectedContent = "select price as revenue from v_lineitem;"; - String trimmedContent = QueryUtil.removeCommentInSql(content).replaceAll("\n", "").trim(); - Assert.assertEquals(trimmedContent, expectedContent); + { + String content = " -- One-line comment and /**range\n" + + "/*\n" + + "Multi-line comment\r\n" + + "-- Multi-line comment*/\n" + + "select price as " + + "/*\n" + + "Multi-line comment\r\n" + + "-- Multi-line comment*/\n" + + "revenue from /*One-line comment-- One-line comment*/ v_lineitem;"; + String expectedContent = "select price as revenue from v_lineitem;"; + String trimmedContent = QueryUtil.removeCommentInSql(content).replaceAll("\n", "").trim(); + Assert.assertEquals(trimmedContent, expectedContent); + } + + { + String sqlWithComment = "select count(*) from test_kylin_fact WHERE column_name = '--this is not comment'\n " + + "LIMIT 100 offset 0"; + Assert.assertEquals(sqlWithComment, QueryUtil.removeCommentInSql(sqlWithComment)); + } + + { + String sqlWithComment = "select count(*) from test_kylin_fact WHERE column_name = '--this is not comment'"; + Assert.assertEquals(sqlWithComment, QueryUtil.removeCommentInSql(sqlWithComment)); + } + + { + String sqlWithComment = "select count(*) from test_kylin_fact WHERE column_name = '/*--this is not comment*/'"; + Assert.assertEquals(sqlWithComment, QueryUtil.removeCommentInSql(sqlWithComment)); + } + + { + String sqlWithComment = "-- comment \n" + originSql + "/* comment */;" + "-- comment \n" + originSql + "/* comment */"; + Assert.assertEquals("select count(*) from test_kylin_fact where price > 10.0;\n" + + "select count(*) from test_kylin_fact where price > 10.0", QueryUtil.removeCommentInSql(sqlWithComment)); + } + + { + String sqlWithComment = "select count(*) from test_kylin_fact /* this is test*/ where price > 10.0 -- comment \n" + + ";" + "insert into test_kylin_fact(id) values(?); -- comment \n"; + Assert.assertEquals("select count(*) from test_kylin_fact where price > 10.0 \n" + + ";insert into test_kylin_fact(id) values(?);", QueryUtil.removeCommentInSql(sqlWithComment)); + } + + { + String sqlWithoutComment = "select * from test_kylin_fact where price=\"/* this is not comment */\""; + Assert.assertEquals(sqlWithoutComment, QueryUtil.removeCommentInSql(sqlWithoutComment)); + } + + { + String sqlWithoutComment = "select count(*) from test_kylin_fact -- 'comment'\n"; + Assert.assertEquals("select count(*) from test_kylin_fact", QueryUtil.removeCommentInSql(sqlWithoutComment)); + } + { + Assert.assertEquals("select count(*)", + QueryUtil.removeCommentInSql("select count(*) -- , --\t --/ --")); + } } @Test