Re: [antlr-dev] code gen for v4 antlr

Terence Parr Mon, 20 Jun 2011 12:36:03 -0700

> Hi Ter,
> 
> This is fixed with the antlr 3.4 snapshot just released:
> 
> http://antlr.org/download/antlr-3.4-complete.jar


Yeah,Sorry. I forgot to mention that. i added inContext() function back in to 
antlr.

> PS: In case anyone else is trying to figure out how to build the antlr 4 dev
> branch, I built it with netbeans adding this to be bottom of the build.xml
> file.  Not sure if its correct or not, but did get a successful compile:

I think you just created our ANT build ;) I'll using that instead of mvn.

Ter

> 
>    <property environment="envList"/>
>    <condition property="antlrTest">
>        <isset property="${envList.ANTLR_HOME}"/>
>    </condition>
>    <!-- where to place the antlr generated files -->
>    <property name="antlr3-codegen-src" 
> location="antlr4/tool/src/org/antlr/v4/codegen"/>
>    <property name="antlr3-parse-src" 
> location="antlr4/tool/src/org/antlr/v4/parse"/>
>    <property name="antlr3-semantics-src" 
> location="antlr4/tool/src/org/antlr/v4/semantics"/>
>    <!-- name of the package - empty in this example -->
>    <property name="package" value=""/>
>    <!-- where to find the grammar files - grammar files are placed in the 
> source directory -->
>    <property name="antr3-codegen-grammar" location="${antlr3-codegen-src}"/>
>    <property name="antr3-parse-grammar" location="${antlr3-parse-src}"/>
>    <property name="antr3-semantics-grammar" 
> location="${antlr3-semantics-src}"/>
>    <!-- where to write/find token files -->
>    <property name="token.lib" location="${src}/${package}" />
> 
>    <!-- antlr options -->
>    <property name="report" value="true" />
>    <property name="multithreaded" value="false" />
>    <property name="debug" value="false" />
>    <property name="conversiontimeout" value="1200000" />
> 
>    <!-- To be added to the classpath for compilation with javac -->
>    <patternset id="antlr.libs">
>        <include name="antlr-3.4-complete.jar" />
>    </patternset>
>    <path id="antlr.path">
>        <!-- <fileset dir="${envList.ANTLR_HOME}/lib" casesensitive="yes"> -->
>        <!--     <patternset refid="antlr.libs" /> -->
>        <!-- </fileset> -->
>        <fileset dir="../../lib" casesensitive="yes">
>            <patternset refid="antlr.libs" />
>        </fileset>
>    </path>
> 
>    <!-- A convenience macro which invokes antlr
>         See target -pre-compile where the macro is being used
>    -->
>    <macrodef name="antlr3-codegen">
>        <attribute name="grammar.name"/>
>        <attribute name="package" default="${package}"/>
>        <sequential>
>            <echo message="antlr ${antr3-codegen-grammar}/@{grammar.name}" />
>            <antlr:ant-antlr3 xmlns:antlr="antlib:org/apache/tools/ant/antlr"
>                  target="${antr3-codegen-grammar}/@{grammar.name}"
>                  outputdirectory="${antlr3-codegen-src}/@{package}"
>                  libdirectory="${antlr3-codegen-src}/@{package}"
>                  multithreaded="${multithreaded}"
>                  conversiontimeout="${conversiontimeout}"
>                  report="${report}"
>                  debug="${debug}">
>                <classpath>
>                    <path refid="antlr.path"/>
>                </classpath>
>            </antlr:ant-antlr3>
>        </sequential>
>    </macrodef>
>    <macrodef name="antlr3-parse">
>        <attribute name="grammar.name"/>
>        <attribute name="package" default="${package}"/>
>        <sequential>
>            <echo message="antlr ${antr3-parse-grammar}/@{grammar.name}" />
>            <antlr:ant-antlr3 xmlns:antlr="antlib:org/apache/tools/ant/antlr"
>                  target="${antr3-parse-grammar}/@{grammar.name}"
>                  outputdirectory="${antlr3-parse-src}/@{package}"
>                  libdirectory="${antlr3-parse-src}/@{package}"
>                  multithreaded="${multithreaded}"
>                  conversiontimeout="${conversiontimeout}"
>                  report="${report}"
>                  debug="${debug}">
>                <classpath>
>                    <path refid="antlr.path"/>
>                </classpath>
>            </antlr:ant-antlr3>
>        </sequential>
>    </macrodef>
>    <macrodef name="antlr3-semantics">
>        <attribute name="grammar.name"/>
>        <attribute name="package" default="${package}"/>
>        <sequential>
>            <echo message="antlr ${antr3-semantics-grammar}/@{grammar.name}" />
>            <antlr:ant-antlr3 xmlns:antlr="antlib:org/apache/tools/ant/antlr"
>                  target="${antr3-semantics-grammar}/@{grammar.name}"
>                  outputdirectory="${antlr3-semantics-src}/@{package}"
>                  libdirectory="${antlr3-semantics-src}/@{package}"
>                  multithreaded="${multithreaded}"
>                  conversiontimeout="${conversiontimeout}"
>                  report="${report}"
>                  debug="${debug}">
>                <classpath>
>                    <path refid="antlr.path"/>
>                </classpath>
>            </antlr:ant-antlr3>
>        </sequential>
>    </macrodef>
> 
>    <!-- Call Antlr before compilation with javac -->
>    <target name="-pre-compile">
>        <!-- <antlr3-codegen grammar.name="SourceGenTriggers.g"/> -->
>        <antlr3-parse grammar.name="ANTLRLexer.g"/>
>        <antlr3-parse grammar.name="ANTLRParser.g"/>
>        <antlr3-parse grammar.name="ASTVerifier.g"/>
>        <antlr3-parse grammar.name="ATNBuilder.g"/>
>        <antlr3-parse grammar.name="ActionSplitter.g"/>
>        <!-- <antlr3-semantics grammar.name="BasicSemanticTriggers.g"/> -->
>        <!-- <antlr3-semantics grammar.name="CollectSymbols.g"/> -->
>    </target>
>    <!-- Clean up and touch grammar files -->
>    <target name="-post-clean">
>        <echo message="touching grammar files in ${antr3-parse-grammar}"/>
>        <touch>
>            <fileset dir="${antr3-parse-grammar}" includes="*.g"/>
>        </touch>
>    </target>
> 
>>> Basic idea is that I parse grammar into an AST and then create a graph (
>>> augmented transition network) representation that is a lot like a syntax
>>> diagram version of the grammar. This ATN gets serialized and generated in
>>> parsers and lexers, just FYI. I do the necessary semantic analysis and so
>>> on to figure out what grammar means. Then, to generate code, I create a
>>> model of the output using the objects in the model package. Then, an
>>> automatic walker traverses this model and instantiates a template with the
>>> same name as the model object. I have some cleanup work to do and will add
>>> an annotation that says which of the object fields should be traversed by
>>> the object model walker.
>>> 
>>> The model is not necessarily inherently imperative, but there is probably a
>>> lot of subtle imperative stuff in there. At the highest level, I create a
>>> ParserFile which contains a Parser model object and a set of named actions
>>> consisting of Action objects. A parser has lots of things including a set of
>>> RuleFunction objects, which in turn, have a series of actions, including
>>> InvokeRule and MatchToken.
>>> 
>>> The templates are much simpler because my code generator is creating a very
>>> explicit output model. Templates should only say how to spit out that object
>>> in text. The previous v3 code generator required a huge amount of thinking
>>> inside the template (that's what I get for allowing nested IFs in
>>> StringTemplate! ;)).
>>> 
>>> Anyway, this is a start and you could look at the templates in Java.stg to
>>> see how much simpler they are.  We should be able to generate Haskell no
>>> problem.
>>> 
>>> You can also look at the source code at:
>>> 
>>> http://antlr.org/depot/antlr4/main
>>> http://antlr.org/depot/antlr4/main/tool/src/org/antlr/v4/codegen
>>> 
>>> Ter
>> 
>> Thanks, that's really neat.  The Java.stg is a lot smaller and simpler 
>> looking
>> than before.
>> 
>> I need to generate a trivial Java lexer, than try to hand code that
>> into Haskell and/or Scheme, then try hacking the string templates some more.
>> Scheme seems similar to Haskell for this, they are both weird and both
>> use continuations :-).
>> 
>> I'm trying to learn Scheme for this purpose, with the idea of trying to
>> write both Haskell and Scheme targets for v4.
>> 
>> Below is the same sketch of the Haskell dfa I sent earlier in Scheme.
>> I'm not sure if v4 generates dfa's yet though, as it's commented in
>> Java.stg:
>> 
>> DFADecl(dfa) ::= <<
>> // define <dfa.name>
>>>> 
>> 
>> Thanks, Mark
>> 
>> #!r6rs
>> (import (rnrs lists (6))
>>        (rnrs base (6))
>>        (rnrs io simple (6)))
>> 
>> ; Copyright (c) 2011, Mark Wright.  All rights reserved.
>> 
>> ; Given: 
>> ; is the Array of tokens from the input file 
>> ; p the current parsing position
>> ; o the ANTLR offset from the current parsing position, which is different 
>> to 
>> ;   a normal offset, as 0 is undefined and returns Nothing, 1 is the current
>> ;   character, 2 is the next character, -1 is the previous character.
>> ; return the character at ANTLR offset, or Nothing if the ANTLR offset is 
>> beyond
>> ; the end of the file, or before the beginning of the file, or 0.
>> ; vector -> int -> int -> token 
>> (define lt
>>  (lambda (is p o)
>>    (cond
>>      ((or (= o 0) (>= (- (+ p o) 1) (vector-length is)) (< (+ p o) 0)) 
>> tid/nothing)
>>      ((> o 0) (vector-ref is (- (+ p o) 1)))
>>      (else (vector-ref is (+ p o))))))
>> 
>> ; The token IDs
>> (define tid/nothing 1000000)
>> (define tid/void 4)
>> (define tid/int 5)
>> (define tid/left-parenthesis 6)
>> (define tid/right-parenthesis 7)
>> (define tid/left-curly-brace 8)
>> (define tid/right-curly-brace 9)
>> (define tid/comma 10)
>> (define tid/semicolon 11)
>> (define tid/id 12)
>> (define tid/ws 13)
>> 
>> ; The DFA states as labelled in the DFA diagram on p. 261 of the ANTLR book.
>> (define ds1/s0 0)
>> (define ds1/s1 1)
>> (define ds1/s2 2)
>> (define ds1/s3 3)
>> (define ds1/s4 4)
>> (define ds1/s5 5)
>> (define ds1/s6 6)
>> (define ds1/s7 7)
>> (define ds1/s8 8)
>> (define ds1/s9 9)
>> (define ds1/s10 10)
>> (define ds1/s11 11)
>> 
>> ; Scanning indicates the DFA is still running.  NoMatch means the DFA does
>> ; does not match this input.  Alt1 predicts a method forward declaration 
>> signature.
>> ; Alt2 predicticts a concrete method definition.
>> (define da1/scanning 0)
>> (define da1/no-match 1)
>> (define da1/alt-1 2)
>> (define da1/alt-2 3)
>> 
>> ; The DFA state transition function.
>> ; First parameter is the current state.
>> ; Second parameter is the token ID.
>> ; Result is the (DfaAlt1, DfaState1) pair, where the
>> ; DfaAlt1 is Scanning while the DFA is still scanning ahead,
>> ; in which case DfaState1 is the next state.  Or if DfaAlt1 is
>> ; NoMatch, then DfaState1 is the last state where the no match
>> ; was detected.  Or DfaAlt1 is the predicted alternative, and
>> ; DfaState1 is the final state.
>> ; sigmaDfa1 :: DfaState1 -> MethodTokenId -> (DfaAlt1, DfaState1)
>> (define sigma-dfa-1
>>  (lambda (ds tid)
>>    (cond
>>     ((and (= ds ds1/s0) (or (= tid tid/void) (= tid tid/int))) (values 
>> da1/scanning ds1/s1))
>>     ((and (= ds ds1/s1) (= tid tid/id)) (values da1/scanning ds1/s2))
>>     ((and (= ds ds1/s2) (= tid tid/left-parenthesis)) (values da1/scanning 
>> ds1/s3))
>>     ((and (= ds ds1/s3) (= tid tid/int)) (values da1/scanning ds1/s4))
>>     ((and (= ds ds1/s4) (= tid tid/id)) (values da1/scanning ds1/s5))
>>     ((and (= ds ds1/s5) (= tid tid/comma)) (values da1/scanning ds1/s6))
>>     ((and (= ds ds1/s5) (= tid tid/right-parenthesis)) (values da1/scanning 
>> ds1/s9))
>>     ((and (= ds ds1/s6) (= tid tid/int)) (values da1/scanning ds1/s7))
>>     ((and (= ds ds1/s7) (= tid tid/id)) (values da1/scanning ds1/s8))
>>     ((and (= ds ds1/s8) (= tid tid/comma)) (values da1/scanning ds1/s6))
>>     ((and (= ds ds1/s8) (= tid tid/right-parenthesis)) (values da1/scanning 
>> ds1/s9))
>>     ((and (= ds ds1/s9) (= tid tid/left-curly-brace)) (values da1/alt-2 
>> ds1/s11))
>>     ((and (= ds ds1/s9) (= tid tid/semicolon)) (values da1/alt-1 ds1/s10))
>>     (else (values da1/no-match ds)))))
>> 
>> ; Loop to run the DFA.
>> ; alt indicates if we are still Scanning, or finished.
>> ; s is the current state.
>> ; is is the input stream of tokens.
>> ; p is the current zero based offset from the start of the token stream.
>> ; o is the lookahead one based token offset, as described in the lt function.
>> ; DfaAlt1 is the predicted alternative, or NoMatch if no alternative is 
>> matched.
>> ; scanDfa1 :: DfaAlt1 -> DfaState1 -> Array Int MethodTokenId -> Int -> Int 
>> -> (DfaAlt1, DfaState1)
>> (define scan-dfa-1
>>  (lambda (alt s is p o)
>>    (if (= alt da1/scanning)
>>        (let ([t (lt is p o)])
>>          (if (= t tid/nothing)
>>              (cons da1/no-match s)
>>              (let-values ([(alt-2 s-2) (sigma-dfa-1 s t)])
>>                (scan-dfa-1 alt-2 s-2 is p (+ o 1)))))
>>        (cons alt s))))
>> 
>> ; Run the DFA to find the predicted alternative if the rule matches.
>> ; is is the input stream of tokens.
>> ; p is the current zero based offset from the start of the token stream.
>> ; o is the lookahead one based token offset, as described in the lt function.
>> ; DfaAlt1 is the predicted alternative, or NoMatch if no alternative is 
>> matched.
>> ; predictDfa1 :: Array Int MethodTokenId -> Int -> Int -> DfaAlt1
>> (define predict-dfa-1
>>  (lambda (is p o)
>>    (car (scan-dfa-1 da1/scanning ds1/s0 is p o))))
>> 
>> 
>> ; An example token sequence which should predict alt1 for the DFA on page 261
>> ; of The Definitive ANTLR Reference.
>> (define la1 (vector tid/int tid/id tid/left-parenthesis tid/int tid/id 
>> tid/comma tid/int tid/id
>>                    tid/comma tid/int tid/id tid/right-parenthesis 
>> tid/semicolon))
>> 
>> (define la2 (vector tid/int tid/id tid/left-parenthesis tid/int tid/id 
>> tid/comma tid/int tid/id
>> tid/comma tid/int tid/id tid/right-parenthesis tid/left-curly-brace))
>> 
>> (display (predict-dfa-1 la1 0 1))
>> (display "\n")
>> (display (predict-dfa-1 la2 0 1))
>> 
>> Non-text part: text/html

_______________________________________________
antlr-dev mailing list
[email protected]
http://www.antlr.org/mailman/listinfo/antlr-dev

Re: [antlr-dev] code gen for v4 antlr

Reply via email to