This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new cb8bcc4d14 Add customizable parser module (#8484) cb8bcc4d14 is described below commit cb8bcc4d14f141e8834feb2b3e08ccba02920a3b Author: Rong Rong <ro...@apache.org> AuthorDate: Wed Apr 13 14:49:05 2022 -0700 Add customizable parser module (#8484) * adding custome parser * adding stuff to use custom parser * adding SqlInsertFromFile SqlCall node. make E2E compilation work * fix license * fix compilation and test * adding in presto driver pom change as well * also add customizable parser test * remove the non-used FILE token * adding TODO for next steps * move fmpp maven plugin to contrib * fix format * add javadoc Co-authored-by: Rong Rong <ro...@startree.ai> --- contrib/pinot-fmpp-maven-plugin/pom.xml | 111 +++++ .../main/java/org/apache/pinot/fmpp/FMPPMojo.java | 270 +++++++++++ .../org/apache/pinot/fmpp/MavenDataLoader.java | 55 +++ headerdefinition.xml | 10 + pinot-common/pom.xml | 87 ++++ pinot-common/src/main/codegen/config.fmpp | 540 +++++++++++++++++++++ .../src/main/codegen/includes/parserImpls.ftl | 103 ++++ .../apache/pinot/sql/parsers/CalciteSqlParser.java | 49 +- .../sql/parsers/parser/SqlInsertFromFile.java | 74 +++ .../pinot/sql/parsers/parser/UnparseUtils.java | 66 +++ .../pinot/sql/parsers/CalciteSqlCompilerTest.java | 61 +-- .../pinot-common-jdk8/pom.xml | 87 ++++ pom.xml | 25 + 13 files changed, 1485 insertions(+), 53 deletions(-) diff --git a/contrib/pinot-fmpp-maven-plugin/pom.xml b/contrib/pinot-fmpp-maven-plugin/pom.xml new file mode 100644 index 0000000000..d09c5dab67 --- /dev/null +++ b/contrib/pinot-fmpp-maven-plugin/pom.xml @@ -0,0 +1,111 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>pinot</artifactId> + <groupId>org.apache.pinot</groupId> + <version>0.11.0-SNAPSHOT</version> + <relativePath>../..</relativePath> + </parent> + + <artifactId>pinot-fmpp-maven-plugin</artifactId> + <name>Pinot FMPP plugin</name> + <url>https://pinot.apache.org/</url> + <packaging>maven-plugin</packaging> + <properties> + <pinot.root>${basedir}/../..</pinot.root> + <maven.version>3.3.3</maven.version> + <fmpp.version>0.9.16</fmpp.version> + <freemarker.version>2.3.28</freemarker.version> + </properties> + + <dependencies> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + </dependency> + <dependency> + <groupId>org.apache.maven</groupId> + <artifactId>maven-core</artifactId> + <version>${maven.version}</version> + <exclusions> + <exclusion> + <groupId>org.codehaus.plexus</groupId> + <artifactId>plexus-utils</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.maven</groupId> + <artifactId>maven-plugin-api</artifactId> + <version>${maven.version}</version> + </dependency> + <dependency> + <groupId>net.sourceforge.fmpp</groupId> + <artifactId>fmpp</artifactId> + <version>${fmpp.version}</version> + </dependency> + <dependency> + <groupId>org.freemarker</groupId> + <artifactId>freemarker</artifactId> + <version>${freemarker.version}</version> + </dependency> + </dependencies> + <build> + <plugins> + <plugin> + <!-- Checkstyle shouldn't apply to Mojo classes --> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-checkstyle-plugin</artifactId> + <configuration> + <skip>true</skip> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-plugin-plugin</artifactId> + <configuration> + <goalPrefix>pinot-fmpp</goalPrefix> + </configuration> + <executions> + <execution> + <id>default-descriptor</id> + <goals> + <goal>descriptor</goal> + </goals> + <phase>process-classes</phase> + </execution> + <execution> + <id>help-descriptor</id> + <goals> + <goal>helpmojo</goal> + </goals> + <phase>process-classes</phase> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> diff --git a/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java new file mode 100644 index 0000000000..787ac7606c --- /dev/null +++ b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/FMPPMojo.java @@ -0,0 +1,270 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.fmpp; + +import com.google.common.base.Joiner; +import com.google.common.base.Stopwatch; +import fmpp.Engine; +import fmpp.ProgressListener; +import fmpp.progresslisteners.TerseConsoleProgressListener; +import fmpp.setting.Settings; +import fmpp.util.MiscUtil; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.commons.io.FileUtils; +import org.apache.maven.plugin.AbstractMojo; +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.plugin.MojoFailureException; +import org.apache.maven.project.MavenProject; + +import static java.lang.String.format; + + +/** + * a maven plugin to run the freemarker generation incrementally + * (if output has not changed, the files are not touched) + * + * @goal generate + * @phase generate-sources + */ +public class FMPPMojo extends AbstractMojo { + + /** + * Used to add new source directories to the build. + * + * @parameter default-value="${project}" + * @required + * @readonly + **/ + private MavenProject project; + + /** + * Where to find the FreeMarker template files. + * + * @parameter default-value="src/main/resources/fmpp/templates/" + * @required + */ + private File templates; + + /** + * Where to write the generated files of the output files. + * + * @parameter default-value="${project.build.directory}/generated-sources/fmpp/" + * @required + */ + private File output; + + /** + * Location of the FreeMarker config file. + * + * @parameter default-value="src/main/resources/fmpp/config.fmpp" + * @required + */ + private File config; + + /** + * compilation scope to be added to ("compile" or "test") + * + * @parameter default-value="compile" + * @required + */ + private String scope; + + /** + * FMPP data model build parameter. + * + * @see <a href="http://fmpp.sourceforge.net/settings.html#key_data">FMPP Data Model Building</a> + * @parameter default-value="" + */ + private String data; + + /** + * if maven properties are added as data + * + * @parameter default-value="true" + * @required + */ + private boolean addMavenDataLoader; + + @Override + public void execute() + throws MojoExecutionException, MojoFailureException { + if (project == null) { + throw new MojoExecutionException("This plugin can only be used inside a project."); + } + String outputPath = output.getAbsolutePath(); + if ((!output.exists() && !output.mkdirs()) || !output.isDirectory()) { + throw new MojoFailureException("can not write to output dir: " + outputPath); + } + String templatesPath = templates.getAbsolutePath(); + if (!templates.exists() || !templates.isDirectory()) { + throw new MojoFailureException("templates not found in dir: " + outputPath); + } + + // add the output directory path to the project source directories + switch (scope) { + case "compile": + project.addCompileSourceRoot(outputPath); + break; + case "test": + project.addTestCompileSourceRoot(outputPath); + break; + default: + throw new MojoFailureException("scope must be compile or test"); + } + + final Stopwatch sw = Stopwatch.createStarted(); + try { + getLog().info( + format("Freemarker generation:\n scope: %s,\n config: %s,\n templates: %s", scope, config.getAbsolutePath(), + templatesPath)); + final File tmp = Files.createTempDirectory("freemarker-tmp").toFile(); + String tmpPath = tmp.getAbsolutePath(); + final String tmpPathNormalized = tmpPath.endsWith(File.separator) ? tmpPath : tmpPath + File.separator; + Settings settings = new Settings(new File(".")); + settings.set(Settings.NAME_SOURCE_ROOT, templatesPath); + settings.set(Settings.NAME_OUTPUT_ROOT, tmp.getAbsolutePath()); + settings.load(config); + settings.addProgressListener(new TerseConsoleProgressListener()); + settings.addProgressListener(new ProgressListener() { + @Override + public void notifyProgressEvent(Engine engine, int event, File src, int pMode, Throwable error, Object param) + throws Exception { + if (event == EVENT_END_PROCESSING_SESSION) { + getLog().info(format("Freemarker generation took %dms", sw.elapsed(TimeUnit.MILLISECONDS))); + sw.reset(); + Report report = moveIfChanged(tmp, tmpPathNormalized); + if (!tmp.delete()) { + throw new MojoFailureException(format("can not delete %s", tmp)); + } + getLog().info(format("Incremental output update took %dms", sw.elapsed(TimeUnit.MILLISECONDS))); + getLog().info(format("new: %d", report.newFiles)); + getLog().info(format("changed: %d", report.changedFiles)); + getLog().info(format("unchanged: %d", report.unchangedFiles)); + } + } + }); + List<String> dataValues = new ArrayList<>(); + if (addMavenDataLoader) { + getLog().info("Adding maven data loader"); + settings.setEngineAttribute(MavenDataLoader.MAVEN_DATA_ATTRIBUTE, new MavenDataLoader.MavenData(project)); + dataValues.add(format("maven: %s()", MavenDataLoader.class.getName())); + } + if (data != null) { + dataValues.add(data); + } + if (!dataValues.isEmpty()) { + String dataString = Joiner.on(",").join(dataValues); + getLog().info("Setting data loader " + dataString); + + settings.add(Settings.NAME_DATA, dataString); + } + settings.execute(); + } catch (Exception e) { + throw new MojoFailureException(MiscUtil.causeMessages(e), e); + } + } + + private static final class Report { + int changedFiles; + int unchangedFiles; + int newFiles; + + Report(int changedFiles, int unchangedFiles, int newFiles) { + super(); + this.changedFiles = changedFiles; + this.unchangedFiles = unchangedFiles; + this.newFiles = newFiles; + } + + public Report() { + this(0, 0, 0); + } + + void add(Report other) { + changedFiles += other.changedFiles; + unchangedFiles += other.unchangedFiles; + newFiles += other.newFiles; + } + + public void addChanged() { + ++changedFiles; + } + + public void addNew() { + ++newFiles; + } + + public void addUnchanged() { + ++unchangedFiles; + } + } + + private Report moveIfChanged(File root, String tmpPath) + throws MojoFailureException, IOException { + Report report = new Report(); + for (File file : root.listFiles()) { + if (file.isDirectory()) { + report.add(moveIfChanged(file, tmpPath)); + if (!file.delete()) { + throw new MojoFailureException(format("can not delete %s", file)); + } + } else { + String absPath = file.getAbsolutePath(); + if (!absPath.startsWith(tmpPath)) { + throw new MojoFailureException(format("%s should start with %s", absPath, tmpPath)); + } + String relPath = absPath.substring(tmpPath.length()); + File outputFile = new File(output, relPath); + if (!outputFile.exists()) { + report.addNew(); + } else if (!FileUtils.contentEquals(file, outputFile)) { + getLog().info(format("%s has changed", relPath)); + if (!outputFile.delete()) { + throw new MojoFailureException(format("can not delete %s", outputFile)); + } + report.addChanged(); + } else { + report.addUnchanged(); + } + if (!outputFile.exists()) { + File parentDir = outputFile.getParentFile(); + if (parentDir.exists() && !parentDir.isDirectory()) { + throw new MojoFailureException( + format("can not move %s to %s as %s is not a dir", file, outputFile, parentDir)); + } + if (!parentDir.exists() && !parentDir.mkdirs()) { + throw new MojoFailureException( + format("can not move %s to %s as dir %s can not be created", file, outputFile, parentDir)); + } + FileUtils.moveFile(file, outputFile); + } else { + if (!file.delete()) { + throw new MojoFailureException(format("can not delete %s", file)); + } + } + } + } + return report; + } +} diff --git a/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java new file mode 100644 index 0000000000..df85ad891b --- /dev/null +++ b/contrib/pinot-fmpp-maven-plugin/src/main/java/org/apache/pinot/fmpp/MavenDataLoader.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.fmpp; + +import fmpp.Engine; +import fmpp.tdd.DataLoader; +import java.util.List; +import org.apache.maven.project.MavenProject; + + +/** + * A data loader for Maven + */ +public class MavenDataLoader implements DataLoader { + public static final class MavenData { + private final MavenProject project; + + public MavenData(MavenProject project) { + this.project = project; + } + + public MavenProject getProject() { + return project; + } + } + + public static final String MAVEN_DATA_ATTRIBUTE = "maven.data"; + + @Override + public Object load(Engine e, List args) + throws Exception { + if (!args.isEmpty()) { + throw new IllegalArgumentException("maven model data loader has no parameters"); + } + + MavenData data = (MavenData) e.getAttribute(MAVEN_DATA_ATTRIBUTE); + return data; + } +} diff --git a/headerdefinition.xml b/headerdefinition.xml index f3fd4287ed..34d178e654 100644 --- a/headerdefinition.xml +++ b/headerdefinition.xml @@ -40,4 +40,14 @@ <isMultiline>true</isMultiline> <padLines>false</padLines> </javadoc_style> + <ftl_style> + <firstLine><#--</firstLine> + <beforeEachLine>// </beforeEachLine> + <endLine>--></endLine> + <firstLineDetectionPattern>(\s|\t)*/\*.*$</firstLineDetectionPattern> + <lastLineDetectionPattern>.*\*/(\s|\t)*$</lastLineDetectionPattern> + <allowBlankLines>false</allowBlankLines> + <isMultiline>true</isMultiline> + <padLines>false</padLines> + </ftl_style> </additionalHeaders> diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml index 76bd905535..2bb5b8127b 100644 --- a/pinot-common/pom.xml +++ b/pinot-common/pom.xml @@ -98,6 +98,93 @@ </java> </configuration> </plugin> + <plugin> + <!-- Extract parser grammar template from calcite-core.jar and put + it under ${project.build.directory} where all freemarker templates are. --> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>unpack-parser-template</id> + <phase>initialize</phase> + <goals> + <goal>unpack</goal> + </goals> + <configuration> + <artifactItems> + <artifactItem> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-core</artifactId> + <type>jar</type> + <overWrite>true</overWrite> + <outputDirectory>${project.build.directory}/</outputDirectory> + <includes>**/Parser.jj,**/default_config.fmpp</includes> + </artifactItem> + </artifactItems> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.pinot</groupId> + <artifactId>pinot-fmpp-maven-plugin</artifactId> + <version>${project.version}</version> + <executions> + <execution> + <id>generate-fmpp-sources</id> + <phase>generate-sources</phase> + <goals> + <goal>generate</goal> + </goals> + <configuration> + <config>${project.basedir}/src/main/codegen/config.fmpp</config> + <output>${project.build.directory}/generated-sources/fmpp</output> + <templates>${project.build.directory}/codegen/templates</templates> + <data>tdd(${project.basedir}/src/main/codegen/config.fmpp), default:tdd(${project.build.directory}/codegen/default_config.fmpp)</data> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <executions> + <execution> + <id>add-generated-sources</id> + <phase>process-sources</phase> + <goals> + <goal>add-source</goal> + </goals> + <configuration> + <sources> + <source>${project.build.directory}/generated-sources/javacc</source> + </sources> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>javacc-maven-plugin</artifactId> + <executions> + <execution> + <phase>generate-sources</phase> + <id>javacc</id> + <goals> + <goal>javacc</goal> + </goals> + <configuration> + <sourceDirectory>${project.build.directory}/generated-sources/fmpp</sourceDirectory> + <includes> + <include>**/Parser.jj</include> + </includes> + <lookAhead>2</lookAhead> + <isStatic>false</isStatic> + <outputDirectory>${project.build.directory}/generated-sources/javacc</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> <dependencies> diff --git a/pinot-common/src/main/codegen/config.fmpp b/pinot-common/src/main/codegen/config.fmpp new file mode 100644 index 0000000000..c83241a6a1 --- /dev/null +++ b/pinot-common/src/main/codegen/config.fmpp @@ -0,0 +1,540 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +data: { + # Data declarations for this parser. + # + # Default declarations are in default_config.fmpp; if you do not include a + # declaration ('imports' or 'nonReservedKeywords', for example) in this file, + # FMPP will use the declaration from default_config.fmpp. + parser: { + # Generated parser implementation class package and name + package: "org.apache.pinot.sql.parsers.parser", + class: "SqlParserImpl", + + # List of import statements. + imports: [ + "com.google.common.collect.*" + "org.apache.pinot.sql.parsers.parser.*" + "java.util.*" + ] + + # List of new keywords to add + keywords: [ + "FILE" + "ARCHIVE" + ] + + # List of non-reserved keywords to add + nonReservedKeywordsToAdd: [ + # customized for Pinot + "FILE" + "ARCHIVE" + + # The following keywords are reserved in core Calcite, + # are reserved in some version of SQL, + # but are not reserved in Babel. + # + # Words that are commented out (e.g. "AND") are still reserved. + # These are the most important reserved words, and SQL cannot be + # unambiguously parsed if they are not reserved. For example, if + # "INNER" is not reserved then in the query + # + # select * from emp inner join dept using (deptno)" + # + # "inner" could be a table alias for "emp". + # + # TODO: remove unused/untested BABEL non-reserved keywords since + # we are only testing a small subset. + # @see: CalciteSqlCompilerTest:testReservedKeywords + "ABS" + "ABSOLUTE" + "ACTION" + "ADD" + "AFTER" + "ALL" + "ALLOCATE" + "ALLOW" + "ALTER" + "AND" +# "ANY" + "ARE" + "ARRAY" +# "ARRAY_AGG" # not a keyword in Calcite + "ARRAY_MAX_CARDINALITY" + "AS" + "ASC" + "ASENSITIVE" + "ASSERTION" + "ASYMMETRIC" + "AT" + "ATOMIC" + "AUTHORIZATION" + "AVG" + "BEFORE" + "BEGIN" + "BEGIN_FRAME" + "BEGIN_PARTITION" + "BETWEEN" + "BIGINT" + "BINARY" + "BIT" +# "BIT_LENGTH" # not a keyword in Calcite + "BLOB" + "BOOLEAN" + "BOTH" + "BREADTH" + "BY" +# "CALL" + "CALLED" + "CARDINALITY" + "CASCADE" + "CASCADED" +# "CASE" + "CAST" + "CATALOG" + "CEIL" + "CEILING" + "CHAR" + "CHARACTER" + "CHARACTER_LENGTH" + "CHAR_LENGTH" + "CHECK" + "CLASSIFIER" + "CLOB" + "CLOSE" + "COALESCE" + "COLLATE" + "COLLATION" + "COLLECT" + "COLUMN" + "COMMIT" + "CONDITION" + "CONNECT" + "CONNECTION" + "CONSTRAINT" + "CONSTRAINTS" + "CONSTRUCTOR" + "CONTAINS" + "CONTINUE" + "CONVERT" + "CORR" + "CORRESPONDING" + "COUNT" + "COVAR_POP" + "COVAR_SAMP" +# "CREATE" +# "CROSS" + "CUBE" + "CUME_DIST" +# "CURRENT" + "CURRENT_CATALOG" + "CURRENT_DATE" + "CURRENT_DEFAULT_TRANSFORM_GROUP" + "CURRENT_PATH" + "CURRENT_ROLE" + "CURRENT_ROW" + "CURRENT_SCHEMA" + "CURRENT_TIME" + "CURRENT_TIMESTAMP" + "CURRENT_TRANSFORM_GROUP_FOR_TYPE" + "CURRENT_USER" +# "CURSOR" + "CYCLE" + "DATA" +# "DATE" + "DAY" + "DEALLOCATE" + "DEC" + "DECIMAL" + "DECLARE" +# "DEFAULT" + "DEFERRABLE" + "DEFERRED" +# "DEFINE" +# "DELETE" + "DENSE_RANK" + "DEPTH" + "DEREF" + "DESC" +# "DESCRIBE" # must be reserved + "DESCRIPTOR" + "DETERMINISTIC" + "DIAGNOSTICS" + "DISALLOW" + "DISCONNECT" +# "DISTINCT" +# "DO" # not a keyword in Calcite + "DOMAIN" + "DOUBLE" +# "DROP" # probably must be reserved + "DYNAMIC" + "EACH" + "ELEMENT" + "ELSE" +# "ELSEIF" # not a keyword in Calcite + "EMPTY" + "END" +# "END-EXEC" # not a keyword in Calcite, and contains '-' + "END_FRAME" + "END_PARTITION" + "EQUALS" + "ESCAPE" + "EVERY" +# "EXCEPT" # must be reserved + "EXCEPTION" + "EXEC" + "EXECUTE" + "EXISTS" +# "EXIT" # not a keyword in Calcite + "EXP" +# "EXPLAIN" # must be reserved + "EXTEND" + "EXTERNAL" + "EXTRACT" + "FALSE" +# "FETCH" + "FILTER" + "FIRST" + "FIRST_VALUE" + "FLOAT" + "FLOOR" + "FOR" + "FOREIGN" +# "FOREVER" # not a keyword in Calcite + "FOUND" + "FRAME_ROW" + "FREE" +# "FROM" # must be reserved +# "FULL" # must be reserved + "FUNCTION" + "FUSION" + "GENERAL" + "GET" + "GLOBAL" + "GO" + "GOTO" +# "GRANT" +# "GROUP" +# "GROUPING" + "GROUPS" +# "HANDLER" # not a keyword in Calcite +# "HAVING" + "HOLD" + "HOUR" + "IDENTITY" +# "IF" # not a keyword in Calcite + "ILIKE" + "IMMEDIATE" + "IMMEDIATELY" + "IMPORT" +# "IN" + "INDICATOR" + "INITIAL" + "INITIALLY" +# "INNER" + "INOUT" + "INPUT" + "INSENSITIVE" +# "INSERT" + "INT" + "INTEGER" +# "INTERSECT" + "INTERSECTION" +# "INTERVAL" +# "INTO" + "IS" + "ISOLATION" +# "ITERATE" # not a keyword in Calcite +# "JOIN" + "JSON_ARRAY" + "JSON_ARRAYAGG" + "JSON_EXISTS" + "JSON_OBJECT" + "JSON_OBJECTAGG" + "JSON_QUERY" + "JSON_VALUE" +# "KEEP" # not a keyword in Calcite + "KEY" + "LAG" + "LANGUAGE" + "LARGE" + "LAST" + "LAST_VALUE" +# "LATERAL" + "LEAD" + "LEADING" +# "LEAVE" # not a keyword in Calcite +# "LEFT" + "LEVEL" + "LIKE" + "LIKE_REGEX" +# "LIMIT" + "LN" + "LOCAL" + "LOCALTIME" + "LOCALTIMESTAMP" + "LOCATOR" +# "LOOP" # not a keyword in Calcite + "LOWER" + "MAP" + "MATCH" + "MATCHES" + "MATCH_NUMBER" +# "MATCH_RECOGNIZE" + "MAX" +# "MAX_CARDINALITY" # not a keyword in Calcite + "MEASURES" + "MEMBER" +# "MERGE" + "METHOD" + "MIN" +# "MINUS" + "MINUTE" + "MOD" + "MODIFIES" + "MODULE" + "MONTH" + "MULTISET" + "NAMES" + "NATIONAL" +# "NATURAL" + "NCHAR" + "NCLOB" +# "NEW" +# "NEXT" + "NO" + "NONE" + "NORMALIZE" + "NOT" + "NTH_VALUE" + "NTILE" +# "NULL" + "NULLIF" + "NUMERIC" + "OBJECT" + "OCCURRENCES_REGEX" + "OCTET_LENGTH" + "OF" +# "OFFSET" + "OLD" + "OMIT" +# "ON" + "ONE" + "ONLY" + "OPEN" + "OPTION" + "OR" +# "ORDER" + "ORDINALITY" + "OUT" +# "OUTER" + "OUTPUT" +# "OVER" + "OVERLAPS" + "OVERLAY" + "PAD" + "PARAMETER" + "PARTIAL" +# "PARTITION" + "PATH" +# "PATTERN" + "PER" + "PERCENT" + "PERCENTILE_CONT" + "PERCENTILE_DISC" + "PERCENT_RANK" + "PERIOD" + "PERMUTE" + "PORTION" + "POSITION" + "POSITION_REGEX" + "POWER" + "PRECEDES" + "PRECISION" + "PREPARE" + "PRESERVE" + "PREV" + "PRIMARY" + "PRIOR" + "PRIVILEGES" + "PROCEDURE" + "PUBLIC" +# "RANGE" + "RANK" + "READ" + "READS" + "REAL" + "RECURSIVE" + "REF" + "REFERENCES" + "REFERENCING" + "REGR_AVGX" + "REGR_AVGY" + "REGR_COUNT" + "REGR_INTERCEPT" + "REGR_R2" + "REGR_SLOPE" + "REGR_SXX" + "REGR_SXY" + "REGR_SYY" + "RELATIVE" + "RELEASE" +# "REPEAT" # not a keyword in Calcite + "RESET" +# "RESIGNAL" # not a keyword in Calcite + "RESTRICT" + "RESULT" + "RETURN" + "RETURNS" + "REVOKE" +# "RIGHT" + "RLIKE" + "ROLE" + "ROLLBACK" +# "ROLLUP" + "ROUTINE" +# "ROW" +# "ROWS" + "ROW_NUMBER" + "RUNNING" + "SAVEPOINT" + "SCHEMA" + "SCOPE" + "SCROLL" + "SEARCH" + "SECOND" + "SECTION" + "SEEK" +# "SELECT" + "SENSITIVE" + "SESSION" + "SESSION_USER" +# "SET" +# "SETS" + "SHOW" +# "SIGNAL" # not a keyword in Calcite + "SIMILAR" + "SIZE" +# "SKIP" # messes with JavaCC's <SKIP> token + "SMALLINT" +# "SOME" + "SPACE" + "SPECIFIC" + "SPECIFICTYPE" + "SQL" +# "SQLCODE" # not a keyword in Calcite +# "SQLERROR" # not a keyword in Calcite + "SQLEXCEPTION" + "SQLSTATE" + "SQLWARNING" + "SQRT" + "START" + "STATE" + "STATIC" + "STDDEV_POP" + "STDDEV_SAMP" +# "STREAM" + "SUBMULTISET" + "SUBSET" + "SUBSTRING" + "SUBSTRING_REGEX" + "SUCCEEDS" + "SUM" + "SYMMETRIC" + "SYSTEM" + "SYSTEM_TIME" + "SYSTEM_USER" +# "TABLE" +# "TABLESAMPLE" + "TEMPORARY" +# "THEN" +# "TIME" +# "TIMESTAMP" + "TIMEZONE_HOUR" + "TIMEZONE_MINUTE" + "TINYINT" + "TO" + "TRAILING" + "TRANSACTION" + "TRANSLATE" + "TRANSLATE_REGEX" + "TRANSLATION" + "TREAT" + "TRIGGER" + "TRIM" + "TRIM_ARRAY" + "TRUE" + "TRUNCATE" + "UESCAPE" + "UNDER" +# "UNDO" # not a keyword in Calcite +# "UNION" + "UNIQUE" + "UNKNOWN" +# "UNNEST" +# "UNTIL" # not a keyword in Calcite +# "UPDATE" + "UPPER" + "UPSERT" + "USAGE" + "USER" +# "USING" + "VALUE" +# "VALUES" + "VALUE_OF" + "VARBINARY" + "VARCHAR" + "VARYING" + "VAR_POP" + "VAR_SAMP" + "VERSION" + "VERSIONING" +# "VERSIONS" # not a keyword in Calcite + "VIEW" +# "WHEN" + "WHENEVER" +# "WHERE" +# "WHILE" # not a keyword in Calcite + "WIDTH_BUCKET" +# "WINDOW" +# "WITH" + "WITHIN" + "WITHOUT" + "WORK" + "WRITE" + "YEAR" + "ZONE" + ] + + # List of extended statement syntax to add + statementParserMethods: [ + "SqlInsertFromFile()" + ] + + # List of files in @includes directory that have parser method + implementationFiles: [ + "parserImpls.ftl" + ], + } +} + +freemarkerLinks: { + includes: includes/ +} diff --git a/pinot-common/src/main/codegen/includes/parserImpls.ftl b/pinot-common/src/main/codegen/includes/parserImpls.ftl new file mode 100644 index 0000000000..57d3e2b8af --- /dev/null +++ b/pinot-common/src/main/codegen/includes/parserImpls.ftl @@ -0,0 +1,103 @@ +<#-- +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +--> + +private void DataFileDef(List<SqlNode> list) : +{ + SqlParserPos pos; + SqlNode uri; +} +{ + ( <FILE> | <ARCHIVE> ) + { + pos = getPos(); + list.add(StringLiteral()); + } +} + +SqlNodeList DataFileDefList() : +{ + SqlParserPos pos; + List<SqlNode> list = Lists.newArrayList(); +} +{ + <FROM> { pos = getPos(); } + { pos = getPos(); } + DataFileDef(list) + ( <COMMA> DataFileDef(list) )* + { + return new SqlNodeList(list, pos.plus(getPos())); + } +} + +/** + * INSERT INTO [db_name.]table_name + * FROM [ FILE | ARCHIVE ] 'file_uri' [, [ FILE | ARCHIVE ] 'file_uri' ] + */ +SqlInsertFromFile SqlInsertFromFile() : +{ + SqlParserPos pos; + SqlIdentifier dbName = null; + SqlIdentifier tableName; + SqlNodeList fileList = null; +} +{ + <INSERT> { pos = getPos(); } + <INTO> + [ + dbName = SimpleIdentifier() + <DOT> + ] + + tableName = SimpleIdentifier() + [ + fileList = DataFileDefList() + ] + { + return new SqlInsertFromFile(pos, dbName, tableName, fileList); + } +} + +/* define the rest of the sql into SqlStmtList + */ +private void SqlStatementList(SqlNodeList list) : +{ +} +{ + { + list.add(SqlStmt()); + } +} + +SqlNodeList SqlStmtsEof() : +{ + SqlParserPos pos; + SqlNodeList stmts; +} +{ + { + pos = getPos(); + stmts = new SqlNodeList(pos); + stmts.add(SqlStmt()); + } + ( LOOKAHEAD(2, <SEMICOLON> SqlStmt()) <SEMICOLON> SqlStatementList(stmts) )* + [ <SEMICOLON> ] <EOF> + { + return stmts; + } +} diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index 98c1e5fefa..1a1a1ac46e 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -19,6 +19,7 @@ package org.apache.pinot.sql.parsers; import com.google.common.annotations.VisibleForTesting; +import java.io.StringReader; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -29,7 +30,7 @@ import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.calcite.config.Lex; +import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlDataTypeSpec; import org.apache.calcite.sql.SqlExplain; @@ -45,9 +46,8 @@ import org.apache.calcite.sql.SqlSelectKeyword; import org.apache.calcite.sql.fun.SqlBetweenOperator; import org.apache.calcite.sql.fun.SqlCase; import org.apache.calcite.sql.fun.SqlLikeOperator; -import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlAbstractParserImpl; import org.apache.calcite.sql.parser.SqlParser; -import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl; import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.commons.collections.CollectionUtils; import org.apache.pinot.common.request.DataSource; @@ -60,6 +60,7 @@ import org.apache.pinot.common.utils.request.RequestUtils; import org.apache.pinot.pql.parsers.pql2.ast.FilterKind; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.spi.utils.Pairs; +import org.apache.pinot.sql.parsers.parser.SqlParserImpl; import org.apache.pinot.sql.parsers.rewriter.QueryRewriter; import org.apache.pinot.sql.parsers.rewriter.QueryRewriterFactory; import org.slf4j.Logger; @@ -73,18 +74,6 @@ public class CalciteSqlParser { public static final List<QueryRewriter> QUERY_REWRITERS = new ArrayList<>(QueryRewriterFactory.getQueryRewriters()); private static final Logger LOGGER = LoggerFactory.getLogger(CalciteSqlParser.class); - /** Lexical policy similar to MySQL with ANSI_QUOTES option enabled. (To be - * precise: MySQL on Windows; MySQL on Linux uses case-sensitive matching, - * like the Linux file system.) The case of identifiers is preserved whether - * or not they quoted; after which, identifiers are matched - * case-insensitively. Double quotes allow identifiers to contain - * non-alphanumeric characters. */ - private static final Lex PINOT_LEX = Lex.MYSQL_ANSI; - - // BABEL is a very liberal conformance value that allows anything supported by any dialect - private static final SqlParser.Config PARSER_CONFIG = - SqlParser.configBuilder().setLex(PINOT_LEX).setConformance(SqlConformanceEnum.BABEL) - .setParserFactory(SqlBabelParserImpl.FACTORY).build(); // To Keep the backward compatibility with 'OPTION' Functionality in PQL, which is used to // provide more hints for query processing. // @@ -95,6 +84,7 @@ public class CalciteSqlParser { // `OPTION (<k1> = <v1>, <k2> = <v2>, <k3> = <v3>)` // or // `OPTION (<k1> = <v1>) OPTION (<k2> = <v2>) OPTION (<k3> = <v3>)` + // TODO: move to use parser syntax extension: `OPTION` `(` `<key>` = `<value>` [, `<key>` = `<value>`]* `)` private static final Pattern OPTIONS_REGEX_PATTEN = Pattern.compile("option\\s*\\(([^\\)]+)\\)", Pattern.CASE_INSENSITIVE); @@ -131,11 +121,11 @@ public class CalciteSqlParser { sql = removeOptionsFromSql(sql); } - SqlParser sqlParser = SqlParser.create(sql, PARSER_CONFIG); SqlNode sqlNode; - try { - sqlNode = sqlParser.parseQuery(); - } catch (SqlParseException e) { + try (StringReader inStream = new StringReader(sql)) { + SqlParserImpl sqlParser = newSqlParser(inStream); + sqlNode = sqlParser.parseSqlStmtEof(); + } catch (Throwable e) { throw new SqlCompilationException("Caught exception while parsing query: " + sql, e); } @@ -311,16 +301,29 @@ public class CalciteSqlParser { * @throws SqlCompilationException if String is not a valid expression. */ public static Expression compileToExpression(String expression) { - SqlParser sqlParser = SqlParser.create(expression, PARSER_CONFIG); SqlNode sqlNode; - try { - sqlNode = sqlParser.parseExpression(); - } catch (SqlParseException e) { + try (StringReader inStream = new StringReader(expression)) { + SqlParserImpl sqlParser = newSqlParser(inStream); + sqlNode = sqlParser.parseSqlExpressionEof(); + } catch (Throwable e) { throw new SqlCompilationException("Caught exception while parsing expression: " + expression, e); } return toExpression(sqlNode); } + @VisibleForTesting + static SqlParserImpl newSqlParser(StringReader inStream) { + SqlParserImpl sqlParser = new SqlParserImpl(inStream); + sqlParser.switchTo(SqlAbstractParserImpl.LexicalState.DQID); + // TODO: convert to MySQL conformance once we retired most of the un-tested BABEL tokens + sqlParser.setConformance(SqlConformanceEnum.BABEL); + sqlParser.setTabSize(1); + sqlParser.setQuotedCasing(Casing.UNCHANGED); + sqlParser.setUnquotedCasing(Casing.UNCHANGED); + sqlParser.setIdentifierMaxLength(SqlParser.DEFAULT_IDENTIFIER_MAX_LENGTH); + return sqlParser; + } + private static void setOptions(PinotQuery pinotQuery, List<String> optionsStatements) { if (optionsStatements.isEmpty()) { return; diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java new file mode 100644 index 0000000000..3b2df6f778 --- /dev/null +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/SqlInsertFromFile.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.sql.parsers.parser; + +import java.util.Arrays; +import java.util.List; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.parser.SqlParserPos; + + +/** + * Calcite extension for creating an INSERT sql node from a File object. + * + * <p>Syntax: INSERT INTO [db_name.]table_name FROM [ FILE | ARCHIVE ] 'file_uri' [, [ FILE | ARCHIVE ] 'file_uri' ] + */ +public class SqlInsertFromFile extends SqlCall { + private static final SqlSpecialOperator OPERATOR = new SqlSpecialOperator("UDF", SqlKind.OTHER_DDL); + private SqlIdentifier _dbName; + private SqlIdentifier _tableName; + private SqlNodeList _fileList; + + public SqlInsertFromFile(SqlParserPos pos, SqlIdentifier dbName, SqlIdentifier tableName, SqlNodeList fileList) { + super(pos); + _dbName = dbName; + _tableName = tableName; + _fileList = fileList; + } + + @Override + public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { + UnparseUtils u = new UnparseUtils(writer, leftPrec, rightPrec); + u.keyword("INSERT", "INTO"); + if (_dbName != null) { + u.node(_dbName).keyword("."); + } + u.node(_tableName); + if (_fileList != null) { + u.keyword("FROM").nodeList(_fileList); + } + } + + @Override + public SqlOperator getOperator() { + return OPERATOR; + } + + @Override + public List<SqlNode> getOperandList() { + return Arrays.asList(_dbName, _tableName, _fileList); + } +} diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java new file mode 100644 index 0000000000..4d85337c73 --- /dev/null +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/parser/UnparseUtils.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.sql.parsers.parser; + +import java.util.Arrays; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlWriter; + + +/** + * {@code UnparseUtils} provides utility for unparsing keywords, {@link SqlNode} or {@link SqlNodeList} using provided + * {@link SqlWriter}. + * + * @see SqlNode#unparse(SqlWriter, int, int) + */ +class UnparseUtils { + private final SqlWriter _writer; + private final int _leftPrec; + private final int _rightPrec; + + UnparseUtils(SqlWriter writer, int leftPrec, int rightPrec) { + _writer = writer; + _leftPrec = leftPrec; + _rightPrec = rightPrec; + } + + UnparseUtils keyword(String... keywords) { + Arrays.stream(keywords).forEach(_writer::keyword); + return this; + } + + UnparseUtils node(SqlNode n) { + n.unparse(_writer, _leftPrec, _rightPrec); + return this; + } + + UnparseUtils nodeList(SqlNodeList l) { + _writer.keyword("("); + if (l.size() > 0) { + l.get(0).unparse(_writer, _leftPrec, _rightPrec); + for (int i = 1; i < l.size(); i++) { + _writer.keyword(","); + l.get(i).unparse(_writer, _leftPrec, _rightPrec); + } + } + _writer.keyword(")"); + return this; + } +} diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index ae0eae0e19..83cde06edc 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -18,14 +18,15 @@ */ package org.apache.pinot.sql.parsers; +import java.io.StringReader; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlNumericLiteral; -import org.apache.calcite.sql.parser.SqlParseException; import org.apache.pinot.common.request.AggregationInfo; import org.apache.pinot.common.request.BrokerRequest; import org.apache.pinot.common.request.Expression; @@ -40,6 +41,8 @@ import org.apache.pinot.common.utils.request.RequestUtils; import org.apache.pinot.pql.parsers.PinotQuery2BrokerRequestConverter; import org.apache.pinot.pql.parsers.pql2.ast.FilterKind; import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.sql.parsers.parser.SqlInsertFromFile; +import org.apache.pinot.sql.parsers.parser.SqlParserImpl; import org.apache.pinot.sql.parsers.rewriter.CompileTimeFunctionsInvoker; import org.testng.Assert; import org.testng.annotations.Test; @@ -445,9 +448,8 @@ public class CalciteSqlCompilerTest { literal = pinotQuery.getSelectList().get(0).getLiteral(); Assert.assertNull(literal); - pinotQuery = CalciteSqlParser - .compileToPinotQuery("select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), " - + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable"); + pinotQuery = CalciteSqlParser.compileToPinotQuery("select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), " + + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable"); Literal literal1 = pinotQuery.getSelectList().get(0).getLiteral(); Literal literal2 = pinotQuery.getSelectList().get(1).getLiteral(); Assert.assertNotNull(literal1); @@ -460,8 +462,8 @@ public class CalciteSqlCompilerTest { Assert.assertEquals(tempBrokerRequest.getSelections().getSelectionColumns().get(1), String.format("'%s'", literal2.getFieldValue().toString())); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT count(*) from mytable " - + "where bar = encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')"); + pinotQuery = CalciteSqlParser.compileToPinotQuery( + "SELECT count(*) from mytable " + "where bar = encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')"); literal = pinotQuery.getSelectList().get(0).getLiteral(); Assert.assertNull(literal); @@ -1003,8 +1005,6 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); - Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 15.")); } // not supported by Calcite SQL (this is in compliance with SQL standard) @@ -1014,8 +1014,6 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); - Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 10.")); } // not supported by Calcite SQL (this is in compliance with SQL standard) @@ -1025,8 +1023,6 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); - Assert.assertTrue(e.getCause().getMessage().contains("Encountered \", DISTINCT\" at line 1, column 18.")); } // The following query although a valid SQL syntax is not @@ -1559,9 +1555,7 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); String message = e.getCause().getMessage(); - Assert.assertTrue(message.startsWith("Encountered") && message.contains("table")); } // date - need to escape try { @@ -1569,9 +1563,6 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); - String message = e.getCause().getMessage(); - Assert.assertTrue(message.startsWith("Encountered") && message.contains("Date")); } // timestamp - need to escape @@ -1580,9 +1571,6 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); - String message = e.getCause().getMessage(); - Assert.assertTrue(message.startsWith("Encountered") && message.contains("timestamp")); } // time - need to escape @@ -1591,9 +1579,6 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); - String message = e.getCause().getMessage(); - Assert.assertTrue(message.startsWith("Encountered") && message.contains("time")); } // group - need to escape @@ -1602,9 +1587,6 @@ public class CalciteSqlCompilerTest { Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); - Assert.assertTrue(e.getCause() instanceof SqlParseException); - String message = e.getCause().getMessage(); - Assert.assertTrue(message.startsWith("Encountered") && message.contains("group")); } // escaping the above works @@ -1998,15 +1980,14 @@ public class CalciteSqlCompilerTest { Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253"); - expression = CalciteSqlParser - .compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"); + expression = + CalciteSqlParser.compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); expression = pinotQuery.getFilterExpression(); Assert.assertNotNull(expression.getLiteral()); - Assert.assertEquals(expression.getLiteral().getFieldValue(), - "key1=value 1&key2=value@!$2&key3=value%3"); + Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1=value 1&key2=value@!$2&key3=value%3"); expression = CalciteSqlParser.compileToExpression("reverse(playerName)"); Assert.assertNotNull(expression.getFunctionCall()); @@ -2656,4 +2637,24 @@ public class CalciteSqlCompilerTest { Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery("SELECT UPPER(col1), avg(col2) from foo")); } + + /** + * Test for customized components in src/main/codegen/parserImpls.ftl file. + */ + @Test + public void testParserExtensionImpl() { + String customSql = "INSERT INTO db.tbl FROM FILE 'file:///tmp/file1', FILE 'file:///tmp/file2'"; + SqlNode sqlNode = testSqlWithCustomSqlParser(customSql); + Assert.assertTrue(sqlNode instanceof SqlInsertFromFile); + } + + private static SqlNode testSqlWithCustomSqlParser(String sqlString) { + try (StringReader inStream = new StringReader(sqlString)) { + SqlParserImpl sqlParser = CalciteSqlParser.newSqlParser(inStream); + return sqlParser.parseSqlStmtEof(); + } catch (Exception e) { + Assert.fail("test custom sql parser failed", e); + } + return null; + } } diff --git a/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml b/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml index abba9e2dc2..095cfca22c 100644 --- a/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml +++ b/pinot-connectors/prestodb-pinot-dependencies/pinot-common-jdk8/pom.xml @@ -86,6 +86,93 @@ </java> </configuration> </plugin> + <plugin> + <!-- Extract parser grammar template from calcite-core.jar and put + it under ${project.build.directory} where all freemarker templates are. --> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>unpack-parser-template</id> + <phase>initialize</phase> + <goals> + <goal>unpack</goal> + </goals> + <configuration> + <artifactItems> + <artifactItem> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-core</artifactId> + <type>jar</type> + <overWrite>true</overWrite> + <outputDirectory>${project.build.directory}/</outputDirectory> + <includes>**/Parser.jj,**/default_config.fmpp</includes> + </artifactItem> + </artifactItems> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.pinot</groupId> + <artifactId>pinot-fmpp-maven-plugin</artifactId> + <version>${project.version}</version> + <executions> + <execution> + <id>generate-fmpp-sources</id> + <phase>generate-sources</phase> + <goals> + <goal>generate</goal> + </goals> + <configuration> + <config>${project.basedir}/src/main/codegen/config.fmpp</config> + <output>${project.build.directory}/generated-sources/fmpp</output> + <templates>${project.build.directory}/codegen/templates</templates> + <data>tdd(${project.basedir}/src/main/codegen/config.fmpp), default:tdd(${project.build.directory}/codegen/default_config.fmpp)</data> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <executions> + <execution> + <id>add-generated-sources</id> + <phase>process-sources</phase> + <goals> + <goal>add-source</goal> + </goals> + <configuration> + <sources> + <source>${project.build.directory}/generated-sources/javacc</source> + </sources> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>javacc-maven-plugin</artifactId> + <executions> + <execution> + <phase>generate-sources</phase> + <id>javacc</id> + <goals> + <goal>javacc</goal> + </goals> + <configuration> + <sourceDirectory>${project.build.directory}/generated-sources/fmpp</sourceDirectory> + <includes> + <include>**/Parser.jj</include> + </includes> + <lookAhead>2</lookAhead> + <isStatic>false</isStatic> + <outputDirectory>${project.build.directory}/generated-sources/javacc</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> <dependencies> diff --git a/pom.xml b/pom.xml index 7ff2080020..6065d06353 100644 --- a/pom.xml +++ b/pom.xml @@ -56,6 +56,7 @@ <module>pinot-connectors</module> <module>pinot-segment-local</module> <module>pinot-compatibility-verifier</module> + <module>contrib/pinot-fmpp-maven-plugin</module> </modules> <licenses> @@ -1662,6 +1663,8 @@ <config>SCRIPT_STYLE</config> <queries>SCRIPT_STYLE</queries> <results>SCRIPT_STYLE</results> + <fmpp>SCRIPT_STYLE</fmpp> + <ftl>FTL_STYLE</ftl> <MockMaker>SCRIPT_STYLE</MockMaker> <appAssemblerScriptTemplate>SCRIPT_STYLE</appAssemblerScriptTemplate> <pql>SCRIPT_STYLE</pql> @@ -1824,6 +1827,28 @@ </execution> </executions> </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <version>3.3.0</version> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>javacc-maven-plugin</artifactId> + <version>2.6</version> + <dependencies> + <dependency> + <groupId>net.java.dev.javacc</groupId> + <artifactId>javacc</artifactId> + <version>7.0.10</version> + </dependency> + </dependencies> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-plugin-plugin</artifactId> + <version>3.6.0</version> + </plugin> </plugins> </build> <reporting> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org