Am 16.08.23 um 20:56 schrieb Matthias Urlichs:
If you do not want to move to format 3.0, please at least specify 1.0 format so that dpkg-source can move to default 3.0 format.Feel free to send off a zero-delay NMU for either.
I have picked 3.0 (quilt). The debdiff is attached. Thanks.
diff -Nru yapps2-2.2.1/debian/changelog yapps2-2.2.1/debian/changelog --- yapps2-2.2.1/debian/changelog 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/debian/changelog 2023-08-16 21:52:30.000000000 +0200 @@ -1,3 +1,11 @@ +yapps2 (2.2.1-3.2) unstable; urgency=medium + + * Non-maintainer upload. + * Convert to source format 3.0 (closes: #1007279). + * Move some Build-Depends in order to build the source package only. + + -- Bastian Germann <b...@debian.org> Wed, 16 Aug 2023 19:52:30 +0000 + yapps2 (2.2.1-3.1) unstable; urgency=medium * Non-maintainer upload. diff -Nru yapps2-2.2.1/debian/control yapps2-2.2.1/debian/control --- yapps2-2.2.1/debian/control 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/debian/control 2023-08-16 21:52:30.000000000 +0200 @@ -2,11 +2,11 @@ Section: python Priority: optional Maintainer: Matthias Urlichs <sm...@debian.org> -Build-Depends: debhelper (>= 9~) +Build-Depends: debhelper (>= 9~), + dh-python, + python3-setuptools, Build-Depends-Indep: python3-dev, - dh-python, hevea, - python3-setuptools, Standards-Version: 3.9.8 Package: yapps2 diff -Nru yapps2-2.2.1/debian/patches/debian.patch yapps2-2.2.1/debian/patches/debian.patch --- yapps2-2.2.1/debian/patches/debian.patch 1970-01-01 01:00:00.000000000 +0100 +++ yapps2-2.2.1/debian/patches/debian.patch 2023-08-16 21:52:30.000000000 +0200 @@ -0,0 +1,210 @@ +--- yapps2-2.2.1.orig/doc/yapps2.tex ++++ yapps2-2.2.1/doc/yapps2.tex +@@ -795,7 +795,7 @@ import Xparser + + class MyX(Xparser.X): + def printmsg(self): +- print "Hello!" ++ print("Hello!") + \end{verbatim} + + \mysubsection{Customizing Scanners} +@@ -924,7 +924,7 @@ portion of the input matched by the curr + {{ start = self._scanner.pos }} + a b c + {{ end = self._scanner.pos }} +- {{ print 'Text is', self._scanner.input[start:end] }} ++ {{ print('Text is', self._scanner.input[start:end]) }} + \end{verbatim} + + \mysubsection{Pre- and Post-Parser Code} +--- yapps2-2.2.1.orig/examples/calc.g ++++ yapps2-2.2.1/examples/calc.g +@@ -3,12 +3,12 @@ globalvars = {} # We will store th + def lookup(map, name): + for x,v in map: + if x == name: return v +- if not globalvars.has_key(name): print 'Undefined (defaulting to 0):', name ++ if name not in globalvars: print('Undefined (defaulting to 0):', name) + return globalvars.get(name, 0) + + def stack_input(scanner,ign): + """Grab more input""" +- scanner.stack_input(raw_input(">?> ")) ++ scanner.stack_input(input(">?> ")) + + %% + parser Calculator: +@@ -20,10 +20,10 @@ parser Calculator: + token VAR: "[a-zA-Z_]+" + + # Each line can either be an expression or an assignment statement +- rule goal: expr<<[]>> END {{ print '=', expr }} ++ rule goal: expr<<[]>> END {{ print('=', expr) }} + {{ return expr }} + | "set" VAR expr<<[]>> END {{ globalvars[VAR] = expr }} +- {{ print VAR, '=', expr }} ++ {{ print(VAR, '=', expr) }} + {{ return expr }} + + # An expression is the sum and difference of factors +@@ -47,18 +47,18 @@ parser Calculator: + "in" expr<<V>> {{ return expr }} + %% + if __name__=='__main__': +- print 'Welcome to the calculator sample for Yapps 2.' +- print ' Enter either "<expression>" or "set <var> <expression>",' +- print ' or just press return to exit. An expression can have' +- print ' local variables: let x = expr in expr' ++ print('Welcome to the calculator sample for Yapps 2.') ++ print(' Enter either "<expression>" or "set <var> <expression>",') ++ print(' or just press return to exit. An expression can have') ++ print(' local variables: let x = expr in expr') + # We could have put this loop into the parser, by making the + # `goal' rule use (expr | set var expr)*, but by putting the + # loop into Python code, we can make it interactive (i.e., enter + # one expression, get the result, enter another expression, etc.) + while 1: +- try: s = raw_input('>>> ') ++ try: s = input('>>> ') + except EOFError: break + if not s.strip(): break + parse('goal', s) +- print 'Bye.' ++ print('Bye.') + +--- yapps2-2.2.1.orig/examples/xml.g ++++ yapps2-2.2.1/examples/xml.g +@@ -54,13 +54,13 @@ if __name__ == '__main__': + '<begin> middle </end>', + '<begin> <nested attr=\'baz\' another="hey"> foo </nested> <nested> bar </nested> </begin>', + ] +- print +- print '____Running tests_______________________________________' ++ print() ++ print('____Running tests_______________________________________') + for test in tests: +- print ++ print() + try: + parser = xml(xmlScanner(test)) + output = '%s ==> %s' % (repr(test), repr(parser.node())) +- except (yappsrt.SyntaxError, AssertionError) as e: ++ except (runtime.SyntaxError, AssertionError) as e: + output = '%s ==> FAILED ==> %s' % (repr(test), e) +- print output ++ print(output) +--- yapps2-2.2.1.orig/setup.py ++++ yapps2-2.2.1/setup.py +@@ -1,13 +1,13 @@ + #!/usr/bin/env python + +-from setuptools import setup, find_packages ++from setuptools import setup + import os + from yapps import __version__ as version + + pkg_root = os.path.dirname(__file__) + + # Error-handling here is to allow package to be built w/o README included +-try: readme = open(os.path.join(pkg_root, 'README.txt')).read() ++try: readme = open(os.path.join(pkg_root, 'README.md')).read() + except IOError: readme = '' + + setup( +@@ -15,15 +15,15 @@ setup( + version = version, + author = 'Amit J. Patel, Matthias Urlichs', + author_email = 'am...@cs.stanford.edu, sm...@debian.org', +- maintainer = 'Mike Kazantsev', +- maintainer_email = 'mk.frag...@gmail.com', ++ maintainer = 'Matthias Urlichs', ++ maintainer_email = 'sm...@debian.org', + license = 'MIT', +- url = 'https://github.com/mk-fg/yapps', ++ url = 'https://github.com/smurfix/yapps', + + description = 'Yet Another Python Parser System', + long_description = readme, + +- packages = find_packages(), ++ packages = ['yapps'], + include_package_data = True, + package_data = {'': ['README.txt']}, + exclude_package_data = {'': ['README.*']}, +--- yapps2-2.2.1.orig/test.sh ++++ yapps2-2.2.1/test.sh +@@ -4,10 +4,10 @@ set -e + trap 'echo ERROR' 0 + + export PYTHONPATH=$(pwd) +-for PY_G in python python3 ; do ++for PY_G in python3 ; do + $PY_G ./yapps2 examples/expr.g examples/expr.py + +-for PY_X in python python3 ; do ++for PY_X in python3 ; do + test "$(echo "1+2*3+4" | $PY_X examples/expr.py goal)" = 11 + done + +--- yapps2-2.2.1.orig/yapps/cli_tool.py ++++ yapps2-2.2.1/yapps/cli_tool.py +@@ -1,4 +1,4 @@ +-#!/usr/bin/env python ++#!/usr/bin/python3 + + # + # Yapps 2 - yet another python parser system +--- yapps2-2.2.1.orig/yapps/parsetree.py ++++ yapps2-2.2.1/yapps/parsetree.py +@@ -270,9 +270,9 @@ class Generator: + + def generate_output(self): + self.calculate() ++ self.write("from __future__ import print_function\n") + self.write(self.preparser) + self.write("# Begin -- grammar generated by Yapps\n") +- self.write("from __future__ import print_function\n") + self.write("import sys, re\n") + self.write("from yapps import runtime\n") + self.write("\n") +--- yapps2-2.2.1.orig/yapps/runtime.py ++++ yapps2-2.2.1/yapps/runtime.py +@@ -175,7 +175,7 @@ class Scanner(object): + file,line,p = pos + if file != self.filename: + if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out) +- print >>out, "(%s: not in input buffer)" % file ++ print("(%s: not in input buffer)" % file, file=out) + return + + text = self.input +@@ -198,7 +198,7 @@ class Scanner(object): + break + spos = cr+1 + else: +- print >>out, "(%s:%d not in input buffer)" % (file,origline) ++ print("(%s:%d not in input buffer)" % (file,origline), file=out) + return + + # Now try printing part of the line +@@ -227,8 +227,8 @@ class Scanner(object): + p = p - 7 + + # Now print the string, along with an indicator +- print >>out, '> ',text +- print >>out, '> ',' '*p + '^' ++ print('> ',text, file=out) ++ print('> ',' '*p + '^', file=out) + + def grab_input(self): + """Get more input if possible.""" +@@ -316,7 +316,7 @@ class Scanner(object): + del self.tokens[0] + self.tokens.append(tok) + self.last_read_token = tok +- # print repr(tok) ++ # print(repr(tok)) + return tok + else: + ignore = self.ignore[best_pat] diff -Nru yapps2-2.2.1/debian/patches/series yapps2-2.2.1/debian/patches/series --- yapps2-2.2.1/debian/patches/series 1970-01-01 01:00:00.000000000 +0100 +++ yapps2-2.2.1/debian/patches/series 2023-08-16 21:52:30.000000000 +0200 @@ -0,0 +1 @@ +debian.patch diff -Nru yapps2-2.2.1/debian/source/format yapps2-2.2.1/debian/source/format --- yapps2-2.2.1/debian/source/format 1970-01-01 01:00:00.000000000 +0100 +++ yapps2-2.2.1/debian/source/format 2023-08-16 21:52:30.000000000 +0200 @@ -0,0 +1 @@ +3.0 (quilt) diff -Nru yapps2-2.2.1/doc/yapps2.haux yapps2-2.2.1/doc/yapps2.haux --- yapps2-2.2.1/doc/yapps2.haux 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/doc/yapps2.haux 1970-01-01 01:00:00.000000000 +0100 @@ -1,31 +0,0 @@ -\@@addtocsec{htoc}{sec1}{0}{\@print{1}\quad{}Introduction{}} -\@@addtocsec{htoc}{sec2}{0}{\@print{2}\quad{}Examples{}} -\@@addtocsec{htoc}{sec3}{1}{\@print{2.1}\quad{}Introduction to Grammars{}} -\@@addtocsec{htoc}{sec4}{1}{\@print{2.2}\quad{}Lisp Expressions{}} -\@@addtocsec{htoc}{sec7}{1}{\@print{2.3}\quad{}Calculator{}} -\@@addtocsec{htoc}{sec8}{1}{\@print{2.4}\quad{}Calculator with Memory{}} -\@@addtocsec{htoc}{sec9}{0}{\@print{3}\quad{}Grammars{}} -\@@addtocsec{htoc}{sec10}{1}{\@print{3.1}\quad{}Left Factoring{}} -\newlabel{sec:Left-Factoring}{{3.1}{X}} -\@@addtocsec{htoc}{sec11}{1}{\@print{3.2}\quad{}Left Recursion{}} -\@@addtocsec{htoc}{sec12}{1}{\@print{3.3}\quad{}Ambiguous Grammars{}} -\newlabel{sec:Ambiguous-Grammars}{{3.3}{X}} -\@@addtocsec{htoc}{sec13}{0}{\@print{4}\quad{}Customization{}} -\@@addtocsec{htoc}{sec14}{1}{\@print{4.1}\quad{}Customizing Parsers{}} -\@@addtocsec{htoc}{sec15}{1}{\@print{4.2}\quad{}Customizing Scanners{}} -\@@addtocsec{htoc}{sec16}{0}{\@print{5}\quad{}Parser Mechanics{}} -\@@addtocsec{htoc}{sec17}{1}{\@print{5.1}\quad{}Parser Objects{}} -\newlabel{sec:Parser-Objects}{{5.1}{X}} -\@@addtocsec{htoc}{sec18}{1}{\@print{5.2}\quad{}Context Sensitive Scanner{}} -\@@addtocsec{htoc}{sec19}{1}{\@print{5.3}\quad{}Internal Variables{}} -\@@addtocsec{htoc}{sec20}{1}{\@print{5.4}\quad{}Pre- and Post-Parser Code{}} -\@@addtocsec{htoc}{sec21}{1}{\@print{5.5}\quad{}Representation of Grammars{}} -\@@addtocsec{htoc}{sec22}{0}{\@print{A}\quad{}Grammar for Parsers{}} -\@@addtocsec{htoc}{sec23}{0}{\@print{B}\quad{}Upgrading{}} -\@@addtocsec{htoc}{sec24}{0}{\@print{C}\quad{}Troubleshooting{}} -\@@addtocsec{htoc}{sec25}{0}{\@print{D}\quad{}History{}} -\@@addtocsec{htoc}{sec26}{0}{\@print{E}\quad{}Debian Extensions{}} -\newlabel{sec:debian}{{E}{X}} -\@@addtocsec{htoc}{sec27}{0}{\@print{F}\quad{}Future Extensions{}} -\newlabel{sec:future}{{F}{X}} -\@@addtocsec{htoc}{sec28}{0}{\@print{G}\quad{}References{}} diff -Nru yapps2-2.2.1/doc/yapps2.html yapps2-2.2.1/doc/yapps2.html --- yapps2-2.2.1/doc/yapps2.html 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/doc/yapps2.html 1970-01-01 01:00:00.000000000 +0100 @@ -1,871 +0,0 @@ -<!DOCTYPE html> -<html > -<head> -<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII"> -<meta name="generator" content="hevea 2.29"> -<style type="text/css"> -.li-itemize{margin:1ex 0ex;} -.li-enumerate{margin:1ex 0ex;} -.dd-description{margin:0ex 0ex 1ex 4ex;} -.dt-description{margin:0ex;} -.toc{list-style:none;} -.footnotetext{margin:0ex; padding:0ex;} -div.footnotetext P{margin:0px; text-indent:1em;} -.thefootnotes{text-align:left;margin:0ex;} -.dt-thefootnotes{margin:0em;} -.dd-thefootnotes{margin:0em 0em 0em 2em;} -.footnoterule{margin:1em auto 1em 0px;width:50%;} -.caption{padding-left:2ex; padding-right:2ex; margin-left:auto; margin-right:auto} -.title{margin:2ex auto;text-align:center} -.titlemain{margin:1ex 2ex 2ex 1ex;} -.titlerest{margin:0ex 2ex;} -.center{text-align:center;margin-left:auto;margin-right:auto;} -.flushleft{text-align:left;margin-left:0ex;margin-right:auto;} -.flushright{text-align:right;margin-left:auto;margin-right:0ex;} -div table{margin-left:inherit;margin-right:inherit;margin-bottom:2px;margin-top:2px} -td table{margin:auto;} -table{border-collapse:collapse;} -td{padding:0;} -.cellpadding0 tr td{padding:0;} -.cellpadding1 tr td{padding:1px;} -pre{text-align:left;margin-left:0ex;margin-right:auto;} -blockquote{margin-left:4ex;margin-right:4ex;text-align:left;} -td p{margin:0px;} -.boxed{border:1px solid black} -.textboxed{border:1px solid black} -.vbar{border:none;width:2px;background-color:black;} -.hbar{border:none;height:2px;width:100%;background-color:black;} -.hfill{border:none;height:1px;width:200%;background-color:black;} -.vdisplay{border-collapse:separate;border-spacing:2px;width:auto; empty-cells:show; border:2px solid red;} -.vdcell{white-space:nowrap;padding:0px; border:2px solid green;} -.display{border-collapse:separate;border-spacing:2px;width:auto; border:none;} -.dcell{white-space:nowrap;padding:0px; border:none;} -.dcenter{margin:0ex auto;} -.vdcenter{border:solid #FF8000 2px; margin:0ex auto;} -.minipage{text-align:left; margin-left:0em; margin-right:auto;} -.marginpar{border:solid thin black; width:20%; text-align:left;} -.marginparleft{float:left; margin-left:0ex; margin-right:1ex;} -.marginparright{float:right; margin-left:1ex; margin-right:0ex;} -.theorem{text-align:left;margin:1ex auto 1ex 0ex;} -.part{margin:2ex auto;text-align:center} -</style> -<title>yapps2</title> -</head> -<body > -<!--HEVEA command line is: /usr/bin/hevea yapps2.tex --> -<!--CUT STYLE article--><!--CUT DEF section 1 --><div class="center"> -<table style="border-spacing:6px;border-collapse:separate;" class="cellpading0"><tr><td style="text-align:center;white-space:nowrap" ><span style="font-size:x-large">The </span><span style="font-size:x-large"><em>Yapps</em></span><span style="font-size:x-large"> Parser Generator System</span></td></tr> -<tr><td style="text-align:center;white-space:nowrap" ><code>http://theory.stanford.edu/~amitp/Yapps/</code></td></tr> -<tr><td style="text-align:center;white-space:nowrap" > Version 2</td></tr> -<tr><td style="text-align:center;white-space:nowrap" > </td></tr> -<tr><td style="text-align:center;white-space:nowrap" >Amit J. Patel</td></tr> -<tr><td style="text-align:center;white-space:nowrap" >http://www-cs-students.stanford.edu/ amitp/ -http://www-cs-students.stanford.edu/ amitp/ </td></tr> -</table> <hr style="height:2"> -</div> -<!--TOC section id="sec1" Introduction--> -<h2 id="sec1" class="section">1  Introduction</h2><!--SEC END --><p><em>Yapps</em> (<U>Y</U>et <U>A</U>nother <U>P</U>ython -<U>P</U>arser <U>S</U>ystem) is an easy to use parser -generator that is written in Python and generates Python code. There -are several parser generator systems already available for Python, -including <span style="font-family:monospace">PyLR, kjParsing, PyBison,</span> and <span style="font-family:monospace">mcf.pars,</span> -but I had different goals for my parser. Yapps is simple, is easy to -use, and produces human-readable parsers. It is not the fastest or -most powerful parser. Yapps is designed to be used when regular -expressions are not enough and other parser systems are too much: -situations where you may write your own recursive descent parser.</p><p>Some unusual features of Yapps that may be of interest are:</p><ol class="enumerate" type=1><li class="li-enumerate">Yapps produces recursive descent parsers that are readable by -humans, as opposed to table-driven parsers that are difficult to -read. A Yapps parser for a simple calculator looks similar to the -one that Mark Lutz wrote by hand for <em>Programming Python.</em></li><li class="li-enumerate">Yapps also allows for rules that accept parameters and pass -arguments to be used while parsing subexpressions. Grammars that -allow for arguments to be passed to subrules and for values to be -passed back are often called <em>attribute grammars.</em> In many -cases parameterized rules can be used to perform actions at “parse -time” that are usually delayed until later. For example, -information about variable declarations can be passed into the -rules that parse a procedure body, so that undefined variables can -be detected at parse time. The types of defined variables can be -used in parsing as well—for example, if the type of <span style="font-family:monospace">X</span> is -known, we can determine whether <span style="font-family:monospace">X(1)</span> is an array reference or -a function call.</li><li class="li-enumerate">Yapps grammars are fairly easy to write, although there are -some inconveniences having to do with ELL(1) parsing that have to be -worked around. For example, rules have to be left factored and -rules may not be left recursive. However, neither limitation seems -to be a problem in practice. <p>Yapps grammars look similar to the notation used in the Python -reference manual, with operators like <code>*</code>, <code>+</code>, <code>|</code>, -<code>[]</code>, and <code>()</code> for patterns, names (<span style="font-family:monospace">tim</span>) for rules, -regular expressions (<code>"[a-z]+"</code>) for tokens, and <code>#</code> for -comments.</p></li><li class="li-enumerate">The Yapps parser generator is written as a single Python module -with no C extensions. Yapps produces parsers that are written -entirely in Python, and require only the Yapps run-time module (5k) -for support.</li><li class="li-enumerate">Yapps’s scanner is context-sensitive, picking tokens based on -the types of the tokens accepted by the parser. This can be -helpful when implementing certain kinds of parsers, such as for a -preprocessor.</li></ol><p>There are several disadvantages of using Yapps over another parser system:</p><ol class="enumerate" type=1><li class="li-enumerate">Yapps parsers are <span style="font-family:monospace">ELL(1)</span> (Extended LL(1)), which is -less powerful than <span style="font-family:monospace">LALR</span> (used by <span style="font-family:monospace">PyLR</span>) or -<span style="font-family:monospace">SLR</span> (used by <span style="font-family:monospace">kjParsing</span>), so Yapps would not be a -good choice for parsing complex languages. For example, allowing -both <span style="font-family:monospace">x := 5;</span> and <span style="font-family:monospace">x;</span> as statements is difficult -because we must distinguish based on only one token of lookahead. -Seeing only <span style="font-family:monospace">x</span>, we cannot decide whether we have an -assignment statement or an expression statement. (Note however -that this kind of grammar can be matched with backtracking; see -section <a href="#sec%3Afuture">F</a>.)</li><li class="li-enumerate">The scanner that Yapps provides can only read from strings, not -files, so an entire file has to be read in before scanning can -begin. It is possible to build a custom scanner, though, so in -cases where stream input is needed (from the console, a network, or -a large file are examples), the Yapps parser can be given a custom -scanner that reads from a stream instead of a string.</li><li class="li-enumerate">Yapps is not designed with efficiency in mind.</li></ol><p>Yapps provides an easy to use parser generator that produces parsers -similar to what you might write by hand. It is not meant to be a -solution for all parsing problems, but instead an aid for those times -you would write a parser by hand rather than using one of the more -powerful parsing packages available.</p><p>Yapps 2.0 is easier to use than Yapps 1.0. New features include a -less restrictive input syntax, which allows mixing of sequences, -choices, terminals, and nonterminals; optional matching; the ability -to insert single-line statements into the generated parser; and -looping constructs <code>*</code> and <code>+</code> similar to the repetitive -matching constructs in regular expressions. Unfortunately, the -addition of these constructs has made Yapps 2.0 incompatible with -Yapps 1.0, so grammars will have to be rewritten. See section -<a href="#sec%3AUpgrading">??</a> for tips on changing Yapps 1.0 grammars for use -with Yapps 2.0.</p> -<!--TOC section id="sec2" Examples--> -<h2 id="sec2" class="section">2  Examples</h2><!--SEC END --><p>In this section are several examples that show the use of Yapps. -First, an introduction shows how to construct grammars and write them -in Yapps form. This example can be skipped by someone familiar with -grammars and parsing. Next is a Lisp expression grammar that produces -a parse tree as output. This example demonstrates the use of tokens -and rules, as well as returning values from rules. The third example -is a expression evaluation grammar that evaluates during parsing -(instead of producing a parse tree).</p> -<!--TOC subsection id="sec3" Introduction to Grammars--> -<h3 id="sec3" class="subsection">2.1  Introduction to Grammars</h3><!--SEC END --><p>A <em>grammar</em> for a natural language specifies how words can be put -together to form large structures, such as phrases and sentences. A -grammar for a computer language is similar in that it specifies how -small components (called <em>tokens</em>) can be put together to form -larger structures. In this section we will write a grammar for a tiny -subset of English.</p><p>Simple English sentences can be described as being a noun phrase -followed by a verb followed by a noun phrase. For example, in the -sentence, “Jack sank the blue ship,” the word “Jack” is the first -noun phrase, “sank” is the verb, and “the blue ship” is the second -noun phrase. In addition we should say what a noun phrase is; for -this example we shall say that a noun phrase is an optional article -(a, an, the) followed by any number of adjectives followed by a noun. -The tokens in our language are the articles, nouns, verbs, and -adjectives. The <em>rules</em> in our language will tell us how to -combine the tokens together to form lists of adjectives, noun phrases, -and sentences:</p><ul class="itemize"><li class="li-itemize"> -<span style="font-family:monospace">sentence: noun_phrase verb noun_phrase</span> -</li><li class="li-itemize"><span style="font-family:monospace">noun_phrase: [article] adjective* noun</span> -</li></ul><p>Notice that some things that we said easily in English, such as -“optional article,” are expressed using special syntax, such as -brackets. When we said, “any number of adjectives,” we wrote -<span style="font-family:monospace">adjective*</span>, where the <span style="font-family:monospace">*</span> means “zero or more of the -preceding pattern”.</p><p>The grammar given above is close to a Yapps grammar. We also have to -specify what the tokens are, and what to do when a pattern is matched. -For this example, we will do nothing when patterns are matched; the -next example will explain how to perform match actions.</p><pre class="verbatim">parser TinyEnglish: - ignore: "\\W+" - token noun: "(Jack|spam|ship)" - token verb: "(sank|threw)" - token article: "(an|a|the)" - token adjective: "(blue|red|green)" - - rule sentence: noun_phrase verb noun_phrase - rule noun_phrase: [article] adjective* noun -</pre><p>The tokens are specified as Python <em>regular expressions</em>. Since -Yapps produces Python code, you can write any regular expression that -would be accepted by Python. (<em>Note:</em> These are Python 1.5 -regular expressions from the <span style="font-family:monospace">re</span> module, not Python 1.4 -regular expressions from the <span style="font-family:monospace">regex</span> module.) In addition to -tokens that you want to see (which are given names), you can also -specify tokens to ignore, marked by the <span style="font-family:monospace">ignore</span> keyword. In -this parser we want to ignore whitespace.</p><p>The TinyEnglish grammar shows how you define tokens and rules, but it -does not specify what should happen once we’ve matched the rules. In -the next example, we will take a grammar and produce a <em>parse -tree</em> from it.</p> -<!--TOC subsection id="sec4" Lisp Expressions--> -<h3 id="sec4" class="subsection">2.2  Lisp Expressions</h3><!--SEC END --><p>Lisp syntax, although hated by many, has a redeeming quality: it is -simple to parse. In this section we will construct a Yapps grammar to -parse Lisp expressions and produce a parse tree as output.</p><!--TOC subsubsection id="sec5" Defining the Grammar--> -<h4 id="sec5" class="subsubsection">Defining the Grammar</h4><!--SEC END --><p>The syntax of Lisp is simple. It has expressions, which are -identifiers, strings, numbers, and lists. A list is a left -parenthesis followed by some number of expressions (separated by -spaces) followed by a right parenthesis. For example, <code>5</code>, -<code>"ni"</code>, and <code>(print "1+2 = " (+ 1 2))</code> are Lisp expressions. -Written as a grammar,</p><pre class="verbatim"> expr: ID | STR | NUM | list - list: ( expr* ) -</pre><p>In addition to having a grammar, we need to specify what to do every -time something is matched. For the tokens, which are strings, we just -want to get the “value” of the token, attach its type (identifier, -string, or number) in some way, and return it. For the lists, we want -to construct and return a Python list.</p><p>Once some pattern is matched, we enclose a return statement enclosed -in <code>{{...}}</code>. The braces allow us to insert any one-line -statement into the parser. Within this statement, we can refer to the -values returned by matching each part of the rule. After matching a -token such as <span style="font-family:monospace">ID</span>, “ID” will be bound to the text of the -matched token. Let’s take a look at the rule:</p><pre class="verbatim"> rule expr: ID {{ return ('id', ID) }} - ... -</pre><p>In a rule, tokens return the text that was matched. For identifiers, -we just return the identifier, along with a “tag” telling us that -this is an identifier and not a string or some other value. Sometimes -we may need to convert this text to a different form. For example, if -a string is matched, we want to remove quotes and handle special forms -like <code>\n</code>. If a number is matched, we want to convert it into a -number. Let’s look at the return values for the other tokens:</p><pre class="verbatim"> ... - | STR {{ return ('str', eval(STR)) }} - | NUM {{ return ('num', atoi(NUM)) }} - ... -</pre><p>If we get a string, we want to remove the quotes and process any -special backslash codes, so we run <span style="font-family:monospace">eval</span> on the quoted string. -If we get a number, we convert it to an integer with <span style="font-family:monospace">atoi</span> and -then return the number along with its type tag.</p><p>For matching a list, we need to do something slightly more -complicated. If we match a Lisp list of expressions, we want to -create a Python list with those values.</p><pre class="verbatim"> rule list: "\\(" # Match the opening parenthesis - {{ result = [] }} # Create a Python list - ( - expr # When we match an expression, - {{ result.append(expr) }} # add it to the list - )* # * means repeat this if needed - "\\)" # Match the closing parenthesis - {{ return result }} # Return the Python list -</pre><p>In this rule we first match the opening parenthesis, then go into a -loop. In this loop we match expressions and add them to the list. -When there are no more expressions to match, we match the closing -parenthesis and return the resulting. Note that <code>#</code> is used for -comments, just as in Python.</p><p>The complete grammar is specified as follows: -</p><pre class="verbatim">parser Lisp: - ignore: '\\s+' - token NUM: '[0-9]+' - token ID: '[-+*/!@%^&=.a-zA-Z0-9_]+' - token STR: '"([^\\"]+|\\\\.)*"' - - rule expr: ID {{ return ('id', ID) }} - | STR {{ return ('str', eval(STR)) }} - | NUM {{ return ('num', atoi(NUM)) }} - | list {{ return list }} - rule list: "\\(" {{ result = [] }} - ( expr {{ result.append(expr) }} - )* - "\\)" {{ return result }} -</pre><p>One thing you may have noticed is that <code>"\\("</code> and <code>"\\)"</code> -appear in the <span style="font-family:monospace">list</span> rule. These are <em>inline tokens</em>: -they appear in the rules without being given a name with the -<span style="font-family:monospace">token</span> keyword. Inline tokens are more convenient to use, but -since they do not have a name, the text that is matched cannot be used -in the return value. They are best used for short simple patterns -(usually punctuation or keywords).</p><p>Another thing to notice is that the number and identifier tokens -overlap. For example, “487” matches both NUM and ID. In Yapps, the -scanner only tries to match tokens that are acceptable to the parser. -This rule doesn’t help here, since both NUM and ID can appear in the -same place in the grammar. There are two rules used to pick tokens if -more than one matches. One is that the <em>longest</em> match is -preferred. For example, “487x” will match as an ID (487x) rather -than as a NUM (487) followed by an ID (x). The second rule is that if -the two matches are the same length, the <em>first</em> one listed in -the grammar is preferred. For example, “487” will match as an NUM -rather than an ID because NUM is listed first in the grammar. Inline -tokens have preference over any tokens you have listed.</p><p>Now that our grammar is defined, we can run Yapps to produce a parser, -and then run the parser to produce a parse tree.</p><!--TOC subsubsection id="sec6" Running Yapps--> -<h4 id="sec6" class="subsubsection">Running Yapps</h4><!--SEC END --><p>In the Yapps module is a function <span style="font-family:monospace">generate</span> that takes an -input filename and writes a parser to another file. We can use this -function to generate the Lisp parser, which is assumed to be in -<span style="font-family:monospace">lisp.g</span>.</p><pre class="verbatim">% python -Python 1.5.1 (#1, Sep 3 1998, 22:51:17) [GCC 2.7.2.3] on linux-i386 -Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam ->>> import yapps ->>> yapps.generate('lisp.g') -</pre><p>At this point, Yapps has written a file <span style="font-family:monospace">lisp.py</span> that contains -the parser. In that file are two classes (one scanner and one parser) -and a function (called <span style="font-family:monospace">parse</span>) that puts things together for -you.</p><p>Alternatively, we can run Yapps from the command line to generate the -parser file:</p><pre class="verbatim">% python yapps.py lisp.g -</pre><p>After running Yapps either from within Python or from the command -line, we can use the Lisp parser by calling the <span style="font-family:monospace">parse</span> -function. The first parameter should be the rule we want to match, -and the second parameter should be the string to parse.</p><pre class="verbatim">>>> import lisp ->>> lisp.parse('expr', '(+ 3 4)') -[('id', '+'), ('num', 3), ('num', 4)] ->>> lisp.parse('expr', '(print "3 = " (+ 1 2))') -[('id', 'print'), ('str', '3 = '), [('id', '+'), ('num', 1), ('num', 2)]] -</pre><p>The <span style="font-family:monospace">parse</span> function is not the only way to use the parser; -section <a href="#sec%3AParser-Objects">5.1</a> describes how to access parser objects -directly.</p><p>We’ve now gone through the steps in creating a grammar, writing a -grammar file for Yapps, producing a parser, and using the parser. In -the next example we’ll see how rules can take parameters and also how -to do computations instead of just returning a parse tree.</p> -<!--TOC subsection id="sec7" Calculator--> -<h3 id="sec7" class="subsection">2.3  Calculator</h3><!--SEC END --><p>A common example parser given in many textbooks is that for simple -expressions, with numbers, addition, subtraction, multiplication, -division, and parenthesization of subexpressions. We’ll write this -example in Yapps, evaluating the expression as we parse.</p><p>Unlike <span style="font-family:monospace">yacc</span>, Yapps does not have any way to specify -precedence rules, so we have to do it ourselves. We say that an -expression is the sum of terms, and that a term is the product of -factors, and that a factor is a number or a parenthesized expression:</p><pre class="verbatim"> expr: factor ( ("+"|"-") factor )* - factor: term ( ("*"|"/") term )* - term: NUM | "(" expr ")" -</pre><p>In order to evaluate the expression as we go, we should keep along an -accumulator while evaluating the lists of terms or factors. Just as -we kept a “result” variable to build a parse tree for Lisp -expressions, we will use a variable to evaluate numerical -expressions. The full grammar is given below:</p><pre class="verbatim">parser Calculator: - token END: "$" # $ means end of string - token NUM: "[0-9]+" - - rule goal: expr END {{ return expr }} - - # An expression is the sum and difference of factors - rule expr: factor {{ v = factor }} - ( "[+]" factor {{ v = v+factor }} - | "-" factor {{ v = v-factor }} - )* {{ return v }} - - # A factor is the product and division of terms - rule factor: term {{ v = term }} - ( "[*]" term {{ v = v*term }} - | "/" term {{ v = v/term }} - )* {{ return v }} - - # A term is either a number or an expression surrounded by parentheses - rule term: NUM {{ return atoi(NUM) }} - | "\\(" expr "\\)" {{ return expr }} -</pre><p>The top-level rule is <em>goal</em>, which says that we are looking for -an expression followed by the end of the string. The <span style="font-family:monospace">END</span> -token is needed because without it, it isn’t clear when to stop -parsing. For example, the string “1+3” could be parsed either as -the expression “1” followed by the string “+3” or it could be -parsed as the expression “1+3”. By requiring expressions to end -with <span style="font-family:monospace">END</span>, the parser is forced to take “1+3”.</p><p>In the two rules with repetition, the accumulator is named <span style="font-family:monospace">v</span>. -After reading in one expression, we initialize the accumulator. Each -time through the loop, we modify the accumulator by adding, -subtracting, multiplying by, or dividing the previous accumulator by -the expression that has been parsed. At the end of the rule, we -return the accumulator.</p><p>The calculator example shows how to process lists of elements using -loops, as well as how to handle precedence of operators.</p><p><em>Note:</em> It’s often important to put the <span style="font-family:monospace">END</span> token in, so -put it in unless you are sure that your grammar has some other -non-ambiguous token marking the end of the program.</p> -<!--TOC subsection id="sec8" Calculator with Memory--> -<h3 id="sec8" class="subsection">2.4  Calculator with Memory</h3><!--SEC END --><p>In the previous example we learned how to write a calculator that -evaluates simple numerical expressions. In this section we will -extend the example to support both local and global variables.</p><p>To support global variables, we will add assignment statements to the -“goal” rule.</p><pre class="verbatim"> rule goal: expr END {{ return expr }} - | 'set' ID expr END {{ global_vars[ID] = expr }} - {{ return expr }} -</pre><p>To use these variables, we need a new kind of terminal:</p><pre class="verbatim"> rule term: ... | ID {{ return global_vars[ID] }} -</pre><p>So far, these changes are straightforward. We simply have a global -dictionary <span style="font-family:monospace">global_vars</span> that stores the variables and values, -we modify it when there is an assignment statement, and we look up -variables in it when we see a variable name.</p><p>To support local variables, we will add variable declarations to the -set of allowed expressions.</p><pre class="verbatim"> rule term: ... | 'let' VAR '=' expr 'in' expr ... -</pre><p>This is where it becomes tricky. Local variables should be stored in -a local dictionary, not in the global one. One trick would be to save -a copy of the global dictionary, modify it, and then restore it -later. In this example we will instead use <em>attributes</em> to -create local information and pass it to subrules.</p><p>A rule can optionally take parameters. When we invoke the rule, we -must pass in arguments. For local variables, let’s use a single -parameter, <span style="font-family:monospace">local_vars</span>:</p><pre class="verbatim"> rule expr<<local_vars>>: ... - rule factor<<local_vars>>: ... - rule term<<local_vars>>: ... -</pre><p>Each time we want to match <span style="font-family:monospace">expr</span>, <span style="font-family:monospace">factor</span>, or -<span style="font-family:monospace">term</span>, we will pass the local variables in the current rule to -the subrule. One interesting case is when we pass as an argument -something <em>other</em> than <span style="font-family:monospace">local_vars</span>:</p><pre class="verbatim"> rule term<<local_vars>>: ... - | 'let' VAR '=' expr<<local_vars>> - {{ local_vars = [(VAR, expr)] + local_vars }} - 'in' expr<<local_vars>> - {{ return expr }} -</pre><p>Note that the assignment to the local variables list does not modify -the original list. This is important to keep local variables from -being seen outside the “let”.</p><p>The other interesting case is when we find a variable:</p><pre class="verbatim">global_vars = {} - -def lookup(map, name): - for x,v in map: if x==name: return v - return global_vars[name] -%% - ... - rule term<<local_vars>: ... - | VAR {{ return lookup(local_vars, VAR) }} -</pre><p>The lookup function will search through the local variable list, and -if it cannot find the name there, it will look it up in the global -variable dictionary.</p><p>A complete grammar for this example, including a read-eval-print loop -for interacting with the calculator, can be found in the examples -subdirectory included with Yapps.</p><p>In this section we saw how to insert code before the parser. We also -saw how to use attributes to transmit local information from one rule -to its subrules.</p> -<!--TOC section id="sec9" Grammars--> -<h2 id="sec9" class="section">3  Grammars</h2><!--SEC END --><p>Each Yapps grammar has a name, a list of tokens, and a set of -production rules. A grammar named <span style="font-family:monospace">X</span> will be used to produce -a parser named <span style="font-family:monospace">X</span> and a scanner anmed <span style="font-family:monospace">XScanner</span>. As -in Python, names are case sensitive, start with a letter, and contain -letters, numbers, and underscores (_).</p><p>There are three kinds of tokens in Yapps: named, inline, and ignored. -As their name implies, named tokens are given a name, using the token -construct: <span style="font-family:monospace">token </span><span style="font-family:monospace"><em>name</em></span><span style="font-family:monospace"> : </span><span style="font-family:monospace"><em>regexp</em></span>. In a rule, the -token can be matched by using the name. Inline tokens are regular -expressions that are used in rules without being declared. Ignored -tokens are declared using the ignore construct: <span style="font-family:monospace">ignore: -</span><span style="font-family:monospace"><em>regexp</em></span>. These tokens are ignored by the scanner, and are -not seen by the parser. Often whitespace is an ignored token. The -regular expressions used to define tokens should use the syntax -defined in the <span style="font-family:monospace">re</span> module, so some symbols may have to be -backslashed.</p><p>Production rules in Yapps have a name and a pattern to match. If the -rule is parameterized, the name should be followed by a list of -parameter names in <code><<...>></code>. A pattern can be a simple pattern -or a compound pattern. Simple patterns are the name of a named token, -a regular expression in quotes (inline token), the name of a -production rule (followed by arguments in <code><<...>></code>, if the rule -has parameters), and single line Python statements (<code>{{...}}</code>). -Compound patterns are sequences (<code>A B C ...</code>), choices ( -<code>A | B | C | ...</code>), options (<code>[...]</code>), zero-or-more repetitions -(<code>...*</code>), and one-or-more repetitions (<code>...+</code>). Like -regular expressions, repetition operators have a higher precedence -than sequences, and sequences have a higher precedence than choices.</p><p>Whenever <code>{{...}}</code> is used, a legal one-line Python statement -should be put inside the braces. The token <code>}}</code> should not -appear within the <code>{{...}}</code> section, even within a string, since -Yapps does not attempt to parse the Python statement. A workaround -for strings is to put two strings together (<code>"}" "}"</code>), or to use -backslashes (<code>"}\}"</code>). At the end of a rule you should use a -<code>{{ return X }}</code> statement to return a value. However, you -should <em>not</em> use any control statements (<span style="font-family:monospace">return</span>, -<span style="font-family:monospace">continue</span>, <span style="font-family:monospace">break</span>) in the middle of a rule. Yapps -needs to make assumptions about the control flow to generate a parser, -and any changes to the control flow will confuse Yapps.</p><p>The <code><<...>></code> form can occur in two places: to define parameters -to a rule and to give arguments when matching a rule. Parameters use -the syntax used for Python functions, so they can include default -arguments and the special forms (<code>*args</code> and <code>**kwargs</code>). -Arguments use the syntax for Python function call arguments, so they -can include normal arguments and keyword arguments. The token -<code>>></code> should not appear within the <code><<...>></code> section.</p><p>In both the statements and rule arguments, you can use names defined -by the parser to refer to matched patterns. You can refer to the text -matched by a named token by using the token name. You can use the -value returned by a production rule by using the name of that rule. -If a name <span style="font-family:monospace">X</span> is matched more than once (such as in loops), you -will have to save the earlier value(s) in a temporary variable, and -then use that temporary variable in the return value. The next -section has an example of a name that occurs more than once.</p> -<!--TOC subsection id="sec10" Left Factoring--> -<h3 id="sec10" class="subsection">3.1  Left Factoring</h3><!--SEC END --><p> -<a id="sec:Left-Factoring"></a></p><p>Yapps produces ELL(1) parsers, which determine which clause to match -based on the first token available. Sometimes the leftmost tokens of -several clauses may be the same. The classic example is the -<em>if/then/else</em> construct in Pascal:</p><pre class="verbatim">rule stmt: "if" expr "then" stmt {{ then_part = stmt }} - "else" stmt {{ return ('If',expr,then_part,stmt) }} - | "if" expr "then" stmt {{ return ('If',expr,stmt,[]) }} -</pre><p>(Note that we have to save the first <span style="font-family:monospace">stmt</span> into a variable -because there is another <span style="font-family:monospace">stmt</span> that will be matched.) The -left portions of the two clauses are the same, which presents a -problem for the parser. The solution is <em>left-factoring</em>: the -common parts are put together, and <em>then</em> a choice is made about -the remaining part:</p><pre class="verbatim">rule stmt: "if" expr - "then" stmt {{ then_part = stmt }} - {{ else_part = [] }} - [ "else" stmt {{ else_part = stmt }} ] - {{ return ('If', expr, then_part, else_part) }} -</pre><p>Unfortunately, the classic <em>if/then/else</em> situation is -<em>still</em> ambiguous when you left-factor. Yapps can deal with this -situation, but will report a warning; see section -<a href="#sec%3AAmbiguous-Grammars">3.3</a> for details.</p><p>In general, replace rules of the form:</p><pre class="verbatim">rule A: a b1 {{ return E1 }} - | a b2 {{ return E2 }} - | c3 {{ return E3 }} - | c4 {{ return E4 }} -</pre><p>with rules of the form:</p><pre class="verbatim">rule A: a ( b1 {{ return E1 }} - | b2 {{ return E2 }} - ) - | c3 {{ return E3 }} - | c4 {{ return E4 }} -</pre> -<!--TOC subsection id="sec11" Left Recursion--> -<h3 id="sec11" class="subsection">3.2  Left Recursion</h3><!--SEC END --><p>A common construct in grammars is for matching a list of patterns, -sometimes separated with delimiters such as commas or semicolons. In -LR-based parser systems, we can parse a list with something like this:</p><pre class="verbatim">rule sum: NUM {{ return NUM }} - | sum "+" NUM {{ return (sum, NUM) }} -</pre><p>Parsing <span style="font-family:monospace">1+2+3+4</span> would produce the output -<span style="font-family:monospace">(((1,2),3),4)</span>, which is what we want from a left-associative -addition operator. Unfortunately, this grammar is <em>left -recursive,</em> because the <span style="font-family:monospace">sum</span> rule contains a clause that -begins with <span style="font-family:monospace">sum</span>. (The recursion occurs at the left side of -the clause.)</p><p>We must restructure this grammar to be <em>right recursive</em> instead:</p><pre class="verbatim">rule sum: NUM {{ return NUM }} - | NUM "+" sum {{ return (NUM, sum) }} -</pre><p>Unfortunately, using this grammar, <span style="font-family:monospace">1+2+3+4</span> would be parsed as -<span style="font-family:monospace">(1,(2,(3,4)))</span>, which no longer follows left associativity. -The rule also needs to be left-factored. Instead, we write the -pattern as a loop instead:</p><pre class="verbatim">rule sum: NUM {{ v = NUM }} - ( "[+]" NUM {{ v = (v,NUM) }} )* - {{ return v }} -</pre><p>In general, replace rules of the form:</p><pre class="verbatim">rule A: A a1 -> << E1 >> - | A a2 -> << E2 >> - | b3 -> << E3 >> - | b4 -> << E4 >> -</pre><p>with rules of the form:</p><pre class="verbatim">rule A: ( b3 {{ A = E3 }} - | b4 {{ A = E4 }} ) - ( a1 {{ A = E1 }} - | a2 {{ A = E2 }} )* - {{ return A }} -</pre><p>We have taken a rule that proved problematic for with recursion and -turned it into a rule that works well with looping constructs.</p> -<!--TOC subsection id="sec12" Ambiguous Grammars--> -<h3 id="sec12" class="subsection">3.3  Ambiguous Grammars</h3><!--SEC END --><p> -<a id="sec:Ambiguous-Grammars"></a></p><p>In section <a href="#sec%3ALeft-Factoring">3.1</a> we saw the classic if/then/else -ambiguity, which occurs because the “else …” portion of an “if -…then …else …” construct is optional. Programs with -nested if/then/else constructs can be ambiguous when one of the else -clauses is missing: -</p><pre class="verbatim">if 1 then if 1 then - if 5 then if 5 then - x := 1; x := 1; - else else - y := 9; y := 9; -</pre><p>The indentation shows that the program can be parsed in two different -ways. (Of course, if we all would adopt Python’s indentation-based -structuring, this would never happen!) Usually we want the parsing on -the left: the “else” should be associated with the closest “if” -statement. In section <a href="#sec%3ALeft-Factoring">3.1</a> we “solved” the -problem by using the following grammar:</p><pre class="verbatim">rule stmt: "if" expr - "then" stmt {{ then_part = stmt }} - {{ else_part = [] }} - [ "else" stmt {{ else_part = stmt }} ] - {{ return ('If', expr, then_part, else_part) }} -</pre><p>Here, we have an optional match of “else” followed by a statement. -The ambiguity is that if an “else” is present, it is not clear -whether you want it parsed immediately or if you want it to be parsed -by the outer “if”.</p><p>Yapps will deal with the situation by matching when the else pattern -when it can. The parser will work in this case because it prefers the -<em>first</em> matching clause, which tells Yapps to parse the “else”. -That is exactly what we want!</p><p>For ambiguity cases with choices, Yapps will choose the <em>first</em> -matching choice. However, remember that Yapps only looks at the first -token to determine its decision, so <span style="font-family:monospace">(a b | a c)</span> will result in -Yapps choosing <span style="font-family:monospace">a b</span> even when the input is <span style="font-family:monospace">a c</span>. It only -looks at the first token, <span style="font-family:monospace">a</span>, to make its decision.</p> -<!--TOC section id="sec13" Customization--> -<h2 id="sec13" class="section">4  Customization</h2><!--SEC END --><p>Both the parsers and the scanners can be customized. The parser is -usually extended by subclassing, and the scanner can either be -subclassed or completely replaced.</p> -<!--TOC subsection id="sec14" Customizing Parsers--> -<h3 id="sec14" class="subsection">4.1  Customizing Parsers</h3><!--SEC END --><p>If additional fields and methods are needed in order for a parser to -work, Python subclassing can be used. (This is unlike parser classes -written in static languages, in which these fields and methods must be -defined in the generated parser class.) We simply subclass the -generated parser, and add any fields or methods required. Expressions -in the grammar can call methods of the subclass to perform any actions -that cannot be expressed as a simple expression. For example, -consider this simple grammar:</p><pre class="verbatim">parser X: - rule goal: "something" {{ self.printmsg() }} -</pre><p>The <span style="font-family:monospace">printmsg</span> function need not be implemented in the parser -class <span style="font-family:monospace">X</span>; it can be implemented in a subclass:</p><pre class="verbatim">import Xparser - -class MyX(Xparser.X): - def printmsg(self): - print("Hello!") -</pre> -<!--TOC subsection id="sec15" Customizing Scanners--> -<h3 id="sec15" class="subsection">4.2  Customizing Scanners</h3><!--SEC END --><p>The generated parser class is not dependent on the generated scanner -class. A scanner object is passed to the parser object’s constructor -in the <span style="font-family:monospace">parse</span> function. To use a different scanner, write -your own function to construct parser objects, with an instance of a -different scanner. Scanner objects must have a <span style="font-family:monospace">token</span> method -that accepts an integer <span style="font-family:monospace">N</span> as well as a list of allowed token -types, and returns the Nth token, as a tuple. The default scanner -raises <span style="font-family:monospace">NoMoreTokens</span> if no tokens are available, and -<span style="font-family:monospace">SyntaxError</span> if no token could be matched. However, the -parser does not rely on these exceptions; only the <span style="font-family:monospace">parse</span> -convenience function (which calls <span style="font-family:monospace">wrap_error_reporter</span>) and -the <span style="font-family:monospace">print_error</span> error display function use those exceptions.</p><p>The tuples representing tokens have four elements. The first two are -the beginning and ending indices of the matched text in the input -string. The third element is the type tag, matching either the name -of a named token or the quoted regexp of an inline or ignored token. -The fourth element of the token tuple is the matched text. If the -input string is <span style="font-family:monospace">s</span>, and the token tuple is -<span style="font-family:monospace">(b,e,type,val)</span>, then <span style="font-family:monospace">val</span> should be equal to -<span style="font-family:monospace">s[b:e]</span>.</p><p>The generated parsers do not the beginning or ending index. They use -only the token type and value. However, the default error reporter -uses the beginning and ending index to show the user where the error -is.</p> -<!--TOC section id="sec16" Parser Mechanics--> -<h2 id="sec16" class="section">5  Parser Mechanics</h2><!--SEC END --><p>The base parser class (Parser) defines two methods, <span style="font-family:monospace">_scan</span> -and <span style="font-family:monospace">_peek</span>, and two fields, <span style="font-family:monospace">_pos</span> and -<span style="font-family:monospace">_scanner</span>. The generated parser inherits from the base -parser, and contains one method for each rule in the grammar. To -avoid name clashes, do not use names that begin with an underscore -(<span style="font-family:monospace">_</span>).</p> -<!--TOC subsection id="sec17" Parser Objects--> -<h3 id="sec17" class="subsection">5.1  Parser Objects</h3><!--SEC END --><p> -<a id="sec:Parser-Objects"></a></p><p>Yapps produces as output two exception classes, a scanner class, a -parser class, and a function <span style="font-family:monospace">parse</span> that puts everything -together. The <span style="font-family:monospace">parse</span> function does not have to be used; -instead, one can create a parser and scanner object and use them -together for parsing.</p><pre class="verbatim"> def parse(rule, text): - P = X(XScanner(text)) - return wrap_error_reporter(P, rule) -</pre><p>The <span style="font-family:monospace">parse</span> function takes a name of a rule and an input string -as input. It creates a scanner and parser object, then calls -<span style="font-family:monospace">wrap_error_reporter</span> to execute the method in the parser -object named <span style="font-family:monospace">rule</span>. The wrapper function will call the -appropriate parser rule and report any parsing errors to standard -output.</p><p>There are several situations in which the <span style="font-family:monospace">parse</span> function -would not be useful. If a different parser or scanner is being used, -or exceptions are to be handled differently, a new <span style="font-family:monospace">parse</span> -function would be required. The supplied <span style="font-family:monospace">parse</span> function can -be used as a template for writing a function for your own needs. An -example of a custom parse function is the <span style="font-family:monospace">generate</span> function -in <span style="font-family:monospace">Yapps.py</span>.</p> -<!--TOC subsection id="sec18" Context Sensitive Scanner--> -<h3 id="sec18" class="subsection">5.2  Context Sensitive Scanner</h3><!--SEC END --><p>Unlike most scanners, the scanner produced by Yapps can take into -account the context in which tokens are needed, and try to match only -good tokens. For example, in the grammar:</p><pre class="verbatim">parser IniFile: - token ID: "[a-zA-Z_0-9]+" - token VAL: ".*" - - rule pair: ID "[ \t]*=[ \t]*" VAL "\n" -</pre><p>we would like to scan lines of text and pick out a name/value pair. -In a conventional scanner, the input string <span style="font-family:monospace">shell=progman.exe</span> -would be turned into a single token of type <span style="font-family:monospace">VAL</span>. The Yapps -scanner, however, knows that at the beginning of the line, an -<span style="font-family:monospace">ID</span> is expected, so it will return <span style="font-family:monospace">"shell"</span> as a token -of type <span style="font-family:monospace">ID</span>. Later, it will return <span style="font-family:monospace">"progman.exe"</span> as -a token of type <span style="font-family:monospace">VAL</span>.</p><p>Context sensitivity decreases the separation between scanner and -parser, but it is useful in parsers like <span style="font-family:monospace">IniFile</span>, where the -tokens themselves are not unambiguous, but <em>are</em> unambiguous -given a particular stage in the parsing process.</p><p>Unfortunately, context sensitivity can make it more difficult to -detect errors in the input. For example, in parsing a Pascal-like -language with “begin” and “end” as keywords, a context sensitive -scanner would only match “end” as the END token if the parser is in -a place that will accept the END token. If not, then the scanner -would match “end” as an identifier. To disable the context -sensitive scanner in Yapps, add the -<span style="font-family:monospace">context-insensitive-scanner</span> option to the grammar:</p><pre class="verbatim">Parser X: - option: "context-insensitive-scanner" -</pre><p>Context-insensitive scanning makes the parser look cleaner as well.</p> -<!--TOC subsection id="sec19" Internal Variables--> -<h3 id="sec19" class="subsection">5.3  Internal Variables</h3><!--SEC END --><p>There are two internal fields that may be of use. The parser object -has two fields, <span style="font-family:monospace">_pos</span>, which is the index of the current -token being matched, and <span style="font-family:monospace">_scanner</span>, which is the scanner -object. The token itself can be retrieved by accessing the scanner -object and calling the <span style="font-family:monospace">token</span> method with the token index. However, if you call <span style="font-family:monospace">token</span> before the token has been requested by the parser, it may mess up a context-sensitive scanner.<sup><a id="text1" href="#note1">1</a></sup> A -potentially useful combination of these fields is to extract the -portion of the input matched by the current rule. To do this, just save the scanner state (<span style="font-family:monospace">_scanner.pos</span>) before the text is matched and then again after the text is matched:</p><pre class="verbatim"> rule R: - {{ start = self._scanner.pos }} - a b c - {{ end = self._scanner.pos }} - {{ print('Text is', self._scanner.input[start:end]) }} -</pre> -<!--TOC subsection id="sec20" Pre- and Post-Parser Code--> -<h3 id="sec20" class="subsection">5.4  Pre- and Post-Parser Code</h3><!--SEC END --><p>Sometimes the parser code needs to rely on helper variables, -functions, and classes. A Yapps grammar can optionally be surrounded -by double percent signs, to separate the grammar from Python code.</p><pre class="verbatim">... Python code ... -%% -... Yapps grammar ... -%% -... Python code ... -</pre><p>The second <code>%%</code> can be omitted if there is no Python code at the -end, and the first <code>%%</code> can be omitted if there is no extra -Python code at all. (To have code only at the end, both separators -are required.)</p><p>If the second <code>%%</code> is omitted, Yapps will insert testing code -that allows you to use the generated parser to parse a file.</p><p>The extended calculator example in the Yapps examples subdirectory -includes both pre-parser and post-parser code.</p> -<!--TOC subsection id="sec21" Representation of Grammars--> -<h3 id="sec21" class="subsection">5.5  Representation of Grammars</h3><!--SEC END --><p>For each kind of pattern there is a class derived from Pattern. Yapps -has classes for Terminal, NonTerminal, Sequence, Choice, Option, Plus, -Star, and Eval. Each of these classes has the following interface:</p><ul class="itemize"><li class="li-itemize"> -setup(<em>gen</em>) Set accepts-є, and call -<em>gen.changed()</em> if it changed. This function can change the -flag from false to true but <em>not</em> from true to false. -</li><li class="li-itemize">update(<em>(</em>gen)) Set <span style="font-variant:small-caps">first</span>and <span style="font-variant:small-caps">follow</span>, and call -<em>gen.changed()</em> if either changed. This function can add to -the sets but <em>not</em> remove from them. -</li><li class="li-itemize">output(<em>gen</em>, <em>indent</em>) Generate code for matching -this rule, using <em>indent</em> as the current indentation level. -Writes are performed using <em>gen.write</em>. -</li><li class="li-itemize">used(<em>vars</em>) Given a list of variables <em>vars</em>, -return two lists: one containing the variables that are used, and -one containing the variables that are assigned. This function is -used for optimizing the resulting code. -</li></ul><p>Both <em>setup</em> and <em>update</em> monotonically increase the -variables they modify. Since the variables can only increase a finite -number of times, we can repeatedly call the function until the -variable stabilized. The <em>used</em> function is not currently -implemented.</p><p>With each pattern in the grammar Yapps associates three pieces of -information: the <span style="font-variant:small-caps">first</span>set, the <span style="font-variant:small-caps">follow</span>set, and the -accepts-є flag.</p><p>The <span style="font-variant:small-caps">first</span>set contains the tokens that can appear as we start -matching the pattern. The <span style="font-variant:small-caps">follow</span>set contains the tokens that can -appear immediately after we match the pattern. The accepts-є -flag is true if the pattern can match no tokens. In this case, <span style="font-variant:small-caps">first</span>will contain all the elements in <span style="font-variant:small-caps">follow</span>. The <span style="font-variant:small-caps">follow</span>set is not -needed when accepts-є is false, and may not be accurate in -those cases.</p><p>Yapps does not compute these sets precisely. Its approximation can -miss certain cases, such as this one:</p><pre class="verbatim"> rule C: ( A* | B ) - rule B: C [A] -</pre><p>Yapps will calculate <span style="font-family:monospace">C</span>’s <span style="font-variant:small-caps">follow</span>set to include <span style="font-family:monospace">A</span>. -However, <span style="font-family:monospace">C</span> will always match all the <span style="font-family:monospace">A</span>’s, so <span style="font-family:monospace">A</span> will -never follow it. Yapps 2.0 does not properly handle this construct, -but if it seems important, I may add support for it in a future -version.</p><p>Yapps also cannot handle constructs that depend on the calling -sequence. For example:</p><pre class="verbatim"> rule R: U | 'b' - rule S: | 'c' - rule T: S 'b' - rule U: S 'a' -</pre><p>The <span style="font-variant:small-caps">follow</span>set for <span style="font-family:monospace">S</span> includes <span style="font-family:monospace">a</span> and <span style="font-family:monospace">b</span>. Since <span style="font-family:monospace">S</span> can be empty, the <span style="font-variant:small-caps">first</span>set for <span style="font-family:monospace">S</span> should include <span style="font-family:monospace">a</span>, -<span style="font-family:monospace">b</span>, and <span style="font-family:monospace">c</span>. However, when parsing <span style="font-family:monospace">R</span>, if the lookahead -is <span style="font-family:monospace">b</span> we should <em>not</em> parse <span style="font-family:monospace">U</span>. That’s because in <span style="font-family:monospace">U</span>, <span style="font-family:monospace">S</span> is followed by <span style="font-family:monospace">a</span> and not <span style="font-family:monospace">b</span>. Therefore in -<span style="font-family:monospace">R</span>, we should choose rule <span style="font-family:monospace">U</span> only if there is an <span style="font-family:monospace">a</span> or -<span style="font-family:monospace">c</span>, but not if there is a <span style="font-family:monospace">b</span>. Yapps and many other LL(1) -systems do not distinguish <span style="font-family:monospace">S b</span> and <span style="font-family:monospace">S a</span>, making <span style="font-family:monospace">S</span>’s <span style="font-variant:small-caps">follow</span>set <span style="font-family:monospace">a, b</span>, and making <span style="font-family:monospace">R</span> always try to match -<span style="font-family:monospace">U</span>. In this case we can solve the problem by changing <span style="font-family:monospace">R</span> to -<code>'b' | U</code> but it may not always be possible to solve all such -problems in this way.</p> -<!--TOC section id="sec22" Grammar for Parsers--> -<h2 id="sec22" class="section">A  Grammar for Parsers</h2><!--SEC END --><p>This is the grammar for parsers, without any Python code mixed in. -The complete grammar can be found in <span style="font-family:monospace">parsedesc.g</span> in the Yapps -distribution.</p><pre class="verbatim">parser ParserDescription: - ignore: "\\s+" - ignore: "#.*?\r?\n" - token END: "$" # $ means end of string - token ATTR: "<<.+?>>" - token STMT: "{{.+?}}" - token ID: '[a-zA-Z_][a-zA-Z_0-9]*' - token STR: '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"' - - rule Parser: "parser" ID ":" - Options - Tokens - Rules - END - - rule Options: ( "option" ":" STR )* - rule Tokens: ( "token" ID ":" STR | "ignore" ":" STR )* - rule Rules: ( "rule" ID OptParam ":" ClauseA )* - - rule ClauseA: ClauseB ( '[|]' ClauseB )* - rule ClauseB: ClauseC* - rule ClauseC: ClauseD [ '[+]' | '[*]' ] - rule ClauseD: STR | ID [ATTR] | STMT - | '\\(' ClauseA '\\) | '\\[' ClauseA '\\]' -</pre> -<!--TOC section id="sec23" Upgrading--> -<h2 id="sec23" class="section">B  Upgrading</h2><!--SEC END --><p>Yapps 2.0 is not backwards compatible with Yapps 1.0. In this section -are some tips for upgrading:</p><ol class="enumerate" type=1><li class="li-enumerate"> -Yapps 1.0 was distributed as a single file. Yapps 2.0 is -instead distributed as two Python files: a <em>parser generator</em> -(26k) and a <em>parser runtime</em> (5k). You need both files to -create parsers, but you need only the runtime (<span style="font-family:monospace">yappsrt.py</span>) -to use the parsers.</li><li class="li-enumerate">Yapps 1.0 supported Python 1.4 regular expressions from the -<span style="font-family:monospace">regex</span> module. Yapps 2.0 uses Python 1.5 regular -expressions from the <span style="font-family:monospace">re</span> module. <em>The new syntax for -regular expressions is not compatible with the old syntax.</em> -Andrew Kuchling has a guide to converting -regular -expressionshttp://www.python.org/doc/howto/regex-to-re/ on his -web page.</li><li class="li-enumerate">Yapps 1.0 wants a pattern and then a return value in <code>-></code> -<code><<...>></code>. Yapps 2.0 allows patterns and Python statements to -be mixed. To convert a rule like this:<pre class="verbatim">rule R: A B C -> << E1 >> - | X Y Z -> << E2 >> -</pre><p>to Yapps 2.0 form, replace the return value specifiers with return -statements:</p><pre class="verbatim">rule R: A B C {{ return E1 }} - | X Y Z {{ return E2 }} -</pre></li><li class="li-enumerate">Yapps 2.0 does not perform tail recursion elimination. This -means any recursive rules you write will be turned into recursive -methods in the parser. The parser will work, but may be slower. -It can be made faster by rewriting recursive rules, using instead -the looping operators <code>*</code> and <code>+</code> provided in Yapps 2.0.</li></ol> -<!--TOC section id="sec24" Troubleshooting--> -<h2 id="sec24" class="section">C  Troubleshooting</h2><!--SEC END --><ul class="itemize"><li class="li-itemize"> -A common error is to write a grammar that doesn’t have an END -token. End tokens are needed when it is not clear when to stop -parsing. For example, when parsing the expression <span style="font-family:monospace">3+5</span>, it is -not clear after reading <span style="font-family:monospace">3</span> whether to treat it as a complete -expression or whether the parser should continue reading. -Therefore the grammar for numeric expressions should include an end -token. Another example is the grammar for Lisp expressions. In -Lisp, it is always clear when you should stop parsing, so you do -<em>not</em> need an end token. In fact, it may be more useful not -to have an end token, so that you can read in several Lisp expressions. -</li><li class="li-itemize">If there is a chance of ambiguity, make sure to put the choices -in the order you want them checked. Usually the most specific -choice should be first. Empty sequences should usually be last. -</li><li class="li-itemize">The context sensitive scanner is not appropriate for all -grammars. You might try using the insensitive scanner with the -<span style="font-family:monospace">context-insensitive-scanner</span> option in the grammar. -</li><li class="li-itemize">If performance turns out to be a problem, try writing a custom -scanner. The Yapps scanner is rather slow (but flexible and easy -to understand). -</li></ul> -<!--TOC section id="sec25" History--> -<h2 id="sec25" class="section">D  History</h2><!--SEC END --><p>Yapps 1 had several limitations that bothered me while writing -parsers:</p><ol class="enumerate" type=1><li class="li-enumerate"> -It was not possible to insert statements into the generated -parser. A common workaround was to write an auxilliary function -that executed those statements, and to call that function as part -of the return value calculation. For example, several of my -parsers had an “append(x,y)” function that existed solely to call -“x.append(y)”. -</li><li class="li-enumerate">The way in which grammars were specified was rather -restrictive: a rule was a choice of clauses. Each clause was a -sequence of tokens and rule names, followed by a return value. -</li><li class="li-enumerate">Optional matching had to be put into a separate rule because -choices were only made at the beginning of a rule. -</li><li class="li-enumerate">Repetition had to be specified in terms of recursion. Not only -was this awkward (sometimes requiring additional rules), I had to -add a tail recursion optimization to Yapps to transform the -recursion back into a loop. -</li></ol><p>Yapps 2 addresses each of these limitations.</p><ol class="enumerate" type=1><li class="li-enumerate"> -Statements can occur anywhere within a rule. (However, only -one-line statements are allowed; multiline blocks marked by -indentation are not.) -</li><li class="li-enumerate">Grammars can be specified using any mix of sequences, choices, -tokens, and rule names. To allow for complex structures, -parentheses can be used for grouping. -</li><li class="li-enumerate">Given choices and parenthesization, optional matching can be -expressed as a choice between some pattern and nothing. In -addition, Yapps 2 has the convenience syntax <code>[A B ...]</code> for -matching <code>A B ...</code> optionally. -</li><li class="li-enumerate">Repetition operators <code>*</code> for zero or more and <code>+</code> for -one or more make it easy to specify repeating patterns. -</li></ol><p>It is my hope that Yapps 2 will be flexible enough to meet my needs -for another year, yet simple enough that I do not hesitate to use it.</p> -<!--TOC section id="sec26" Debian Extensions--> -<h2 id="sec26" class="section">E  Debian Extensions</h2><!--SEC END --><p> -<a id="sec:debian"></a></p><p>The Debian version adds the following enhancements to the original -Yapps code. They were written by Matthias Urlichs.</p><ol class="enumerate" type=1><li class="li-enumerate"> -Yapps can stack input sources ("include files"). A usage example -is supplied with the calc.g sample program. -</li><li class="li-enumerate">Yapps now understands augmented ignore-able patterns. -This means that Yapps can parse multi-line C comments; this wasn’t -possible before. -</li><li class="li-enumerate">Better error reporting. -</li><li class="li-enumerate">Yapps now reads its input incrementally. -</li></ol><p>The generated parser has been renamed to <span style="font-family:monospace">yapps/runtime.py</span>. -In Debian, this file is provided by the <span style="font-family:monospace">yapps2-runtime</span> package. -You need to depend on it if you Debianize Python programs which use -yapps.</p> -<!--TOC section id="sec27" Future Extensions--> -<h2 id="sec27" class="section">F  Future Extensions</h2><!--SEC END --><p> -<a id="sec:future"></a></p><p>I am still investigating the possibility of LL(2) and higher -lookahead. However, it looks like the resulting parsers will be -somewhat ugly. </p><p>It would be nice to control choices with user-defined predicates.</p><p>The most likely future extension is backtracking. A grammar pattern -like <code>(VAR ':=' expr)? {{ return Assign(VAR,expr) }} : expr {{ return expr }}</code> -would turn into code that attempted to match <code>VAR ':=' expr</code>. If -it succeeded, it would run <code>{{ return ... }}</code>. If it failed, it -would match <code>expr {{ return expr }}</code>. Backtracking may make it -less necessary to write LL(2) grammars.</p> -<!--TOC section id="sec28" References--> -<h2 id="sec28" class="section">G  References</h2><!--SEC END --><ol class="enumerate" type=1><li class="li-enumerate"> -The Python-Parser -SIGhttp://www.python.org/sigs/parser-sig/ is the first place -to look for a list of parser systems for Python.</li><li class="li-enumerate">ANTLR/PCCTS, by Terrence Parr, is available at -The ANTLR Home Pagehttp://www.antlr.org/.</li><li class="li-enumerate">PyLR, by Scott Cotton, is at his Starship -pagehttp://starship.skyport.net/crew/scott/PyLR.html.</li><li class="li-enumerate">John Aycock’s Compiling Little Languages -Frameworkhttp://www.foretec.com/python/workshops/1998-11/proceedings/papers/aycock-little/aycock-little.html.</li><li class="li-enumerate">PyBison, by Scott Hassan, can be found at -his Python Projects -pagehttp://coho.stanford.edu/~hassan/Python/.</li><li class="li-enumerate">mcf.pars, by Mike C. Fletcher, is available at -his web -pagehttp://members.rogers.com/mcfletch/programming/simpleparse/simpleparse.html.</li><li class="li-enumerate">kwParsing, by Aaron Watters, is available at -his Starship -pagehttp://starship.skyport.net/crew/aaron_watters/kwParsing/. -</li></ol><!--BEGIN NOTES document--> -<hr class="footnoterule"><dl class="thefootnotes"><dt class="dt-thefootnotes"> -<a id="note1" href="#text1">1</a></dt><dd class="dd-thefootnotes"><div class="footnotetext">When using a context-sensitive scanner, the parser tells the scanner what the valid token types are at each point. If you call <span style="font-family:monospace">token</span> before the parser can tell the scanner the valid token types, the scanner will attempt to match without considering the context.</div></dd></dl> -<!--END NOTES--> -<!--CUT END --> -<!--HTMLFOOT--> -<!--ENDHTML--> -<!--FOOTER--> -<hr style="height:2"><blockquote class="quote"><em>This document was translated from L<sup>A</sup>T<sub>E</sub>X by -</em><a href="http://hevea.inria.fr/index.html"><em>H</em><em><span style="font-size:small"><sup>E</sup></span></em><em>V</em><em><span style="font-size:small"><sup>E</sup></span></em><em>A</em></a><em>.</em></blockquote></body> -</html> diff -Nru yapps2-2.2.1/doc/yapps2.htoc yapps2-2.2.1/doc/yapps2.htoc --- yapps2-2.2.1/doc/yapps2.htoc 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/doc/yapps2.htoc 1970-01-01 01:00:00.000000000 +0100 @@ -1,36 +0,0 @@ -\begin{tocenv} -\tocitem \@locref{sec1}{\begin{@norefs}\@print{1}\quad{}Introduction{}\end{@norefs}} -\tocitem \@locref{sec2}{\begin{@norefs}\@print{2}\quad{}Examples{}\end{@norefs}} -\begin{tocenv} -\tocitem \@locref{sec3}{\begin{@norefs}\@print{2.1}\quad{}Introduction to Grammars{}\end{@norefs}} -\tocitem \@locref{sec4}{\begin{@norefs}\@print{2.2}\quad{}Lisp Expressions{}\end{@norefs}} -\tocitem \@locref{sec7}{\begin{@norefs}\@print{2.3}\quad{}Calculator{}\end{@norefs}} -\tocitem \@locref{sec8}{\begin{@norefs}\@print{2.4}\quad{}Calculator with Memory{}\end{@norefs}} -\end{tocenv} -\tocitem \@locref{sec9}{\begin{@norefs}\@print{3}\quad{}Grammars{}\end{@norefs}} -\begin{tocenv} -\tocitem \@locref{sec10}{\begin{@norefs}\@print{3.1}\quad{}Left Factoring{}\end{@norefs}} -\tocitem \@locref{sec11}{\begin{@norefs}\@print{3.2}\quad{}Left Recursion{}\end{@norefs}} -\tocitem \@locref{sec12}{\begin{@norefs}\@print{3.3}\quad{}Ambiguous Grammars{}\end{@norefs}} -\end{tocenv} -\tocitem \@locref{sec13}{\begin{@norefs}\@print{4}\quad{}Customization{}\end{@norefs}} -\begin{tocenv} -\tocitem \@locref{sec14}{\begin{@norefs}\@print{4.1}\quad{}Customizing Parsers{}\end{@norefs}} -\tocitem \@locref{sec15}{\begin{@norefs}\@print{4.2}\quad{}Customizing Scanners{}\end{@norefs}} -\end{tocenv} -\tocitem \@locref{sec16}{\begin{@norefs}\@print{5}\quad{}Parser Mechanics{}\end{@norefs}} -\begin{tocenv} -\tocitem \@locref{sec17}{\begin{@norefs}\@print{5.1}\quad{}Parser Objects{}\end{@norefs}} -\tocitem \@locref{sec18}{\begin{@norefs}\@print{5.2}\quad{}Context Sensitive Scanner{}\end{@norefs}} -\tocitem \@locref{sec19}{\begin{@norefs}\@print{5.3}\quad{}Internal Variables{}\end{@norefs}} -\tocitem \@locref{sec20}{\begin{@norefs}\@print{5.4}\quad{}Pre- and Post-Parser Code{}\end{@norefs}} -\tocitem \@locref{sec21}{\begin{@norefs}\@print{5.5}\quad{}Representation of Grammars{}\end{@norefs}} -\end{tocenv} -\tocitem \@locref{sec22}{\begin{@norefs}\@print{A}\quad{}Grammar for Parsers{}\end{@norefs}} -\tocitem \@locref{sec23}{\begin{@norefs}\@print{B}\quad{}Upgrading{}\end{@norefs}} -\tocitem \@locref{sec24}{\begin{@norefs}\@print{C}\quad{}Troubleshooting{}\end{@norefs}} -\tocitem \@locref{sec25}{\begin{@norefs}\@print{D}\quad{}History{}\end{@norefs}} -\tocitem \@locref{sec26}{\begin{@norefs}\@print{E}\quad{}Debian Extensions{}\end{@norefs}} -\tocitem \@locref{sec27}{\begin{@norefs}\@print{F}\quad{}Future Extensions{}\end{@norefs}} -\tocitem \@locref{sec28}{\begin{@norefs}\@print{G}\quad{}References{}\end{@norefs}} -\end{tocenv} diff -Nru yapps2-2.2.1/doc/yapps2.tex yapps2-2.2.1/doc/yapps2.tex --- yapps2-2.2.1/doc/yapps2.tex 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/doc/yapps2.tex 2018-10-15 12:58:00.000000000 +0200 @@ -795,7 +795,7 @@ class MyX(Xparser.X): def printmsg(self): - print("Hello!") + print "Hello!" \end{verbatim} \mysubsection{Customizing Scanners} @@ -924,7 +924,7 @@ {{ start = self._scanner.pos }} a b c {{ end = self._scanner.pos }} - {{ print('Text is', self._scanner.input[start:end]) }} + {{ print 'Text is', self._scanner.input[start:end] }} \end{verbatim} \mysubsection{Pre- and Post-Parser Code} diff -Nru yapps2-2.2.1/examples/calc.g yapps2-2.2.1/examples/calc.g --- yapps2-2.2.1/examples/calc.g 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/examples/calc.g 2018-10-15 12:58:00.000000000 +0200 @@ -3,12 +3,12 @@ def lookup(map, name): for x,v in map: if x == name: return v - if name not in globalvars: print('Undefined (defaulting to 0):', name) + if not globalvars.has_key(name): print 'Undefined (defaulting to 0):', name return globalvars.get(name, 0) def stack_input(scanner,ign): """Grab more input""" - scanner.stack_input(input(">?> ")) + scanner.stack_input(raw_input(">?> ")) %% parser Calculator: @@ -20,10 +20,10 @@ token VAR: "[a-zA-Z_]+" # Each line can either be an expression or an assignment statement - rule goal: expr<<[]>> END {{ print('=', expr) }} + rule goal: expr<<[]>> END {{ print '=', expr }} {{ return expr }} | "set" VAR expr<<[]>> END {{ globalvars[VAR] = expr }} - {{ print(VAR, '=', expr) }} + {{ print VAR, '=', expr }} {{ return expr }} # An expression is the sum and difference of factors @@ -47,18 +47,18 @@ "in" expr<<V>> {{ return expr }} %% if __name__=='__main__': - print('Welcome to the calculator sample for Yapps 2.') - print(' Enter either "<expression>" or "set <var> <expression>",') - print(' or just press return to exit. An expression can have') - print(' local variables: let x = expr in expr') + print 'Welcome to the calculator sample for Yapps 2.' + print ' Enter either "<expression>" or "set <var> <expression>",' + print ' or just press return to exit. An expression can have' + print ' local variables: let x = expr in expr' # We could have put this loop into the parser, by making the # `goal' rule use (expr | set var expr)*, but by putting the # loop into Python code, we can make it interactive (i.e., enter # one expression, get the result, enter another expression, etc.) while 1: - try: s = input('>>> ') + try: s = raw_input('>>> ') except EOFError: break if not s.strip(): break parse('goal', s) - print('Bye.') + print 'Bye.' diff -Nru yapps2-2.2.1/examples/xml.g yapps2-2.2.1/examples/xml.g --- yapps2-2.2.1/examples/xml.g 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/examples/xml.g 2018-10-15 12:58:00.000000000 +0200 @@ -54,13 +54,13 @@ '<begin> middle </end>', '<begin> <nested attr=\'baz\' another="hey"> foo </nested> <nested> bar </nested> </begin>', ] - print() - print('____Running tests_______________________________________') + print + print '____Running tests_______________________________________' for test in tests: - print() + print try: parser = xml(xmlScanner(test)) output = '%s ==> %s' % (repr(test), repr(parser.node())) - except (runtime.SyntaxError, AssertionError) as e: + except (yappsrt.SyntaxError, AssertionError) as e: output = '%s ==> FAILED ==> %s' % (repr(test), e) - print(output) + print output diff -Nru yapps2-2.2.1/setup.py yapps2-2.2.1/setup.py --- yapps2-2.2.1/setup.py 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/setup.py 2018-10-15 12:58:00.000000000 +0200 @@ -1,13 +1,13 @@ #!/usr/bin/env python -from setuptools import setup +from setuptools import setup, find_packages import os from yapps import __version__ as version pkg_root = os.path.dirname(__file__) # Error-handling here is to allow package to be built w/o README included -try: readme = open(os.path.join(pkg_root, 'README.md')).read() +try: readme = open(os.path.join(pkg_root, 'README.txt')).read() except IOError: readme = '' setup( @@ -15,15 +15,15 @@ version = version, author = 'Amit J. Patel, Matthias Urlichs', author_email = 'am...@cs.stanford.edu, sm...@debian.org', - maintainer = 'Matthias Urlichs', - maintainer_email = 'sm...@debian.org', + maintainer = 'Mike Kazantsev', + maintainer_email = 'mk.frag...@gmail.com', license = 'MIT', - url = 'https://github.com/smurfix/yapps', + url = 'https://github.com/mk-fg/yapps', description = 'Yet Another Python Parser System', long_description = readme, - packages = ['yapps'], + packages = find_packages(), include_package_data = True, package_data = {'': ['README.txt']}, exclude_package_data = {'': ['README.*']}, diff -Nru yapps2-2.2.1/test.sh yapps2-2.2.1/test.sh --- yapps2-2.2.1/test.sh 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/test.sh 2018-10-15 12:58:00.000000000 +0200 @@ -4,10 +4,10 @@ trap 'echo ERROR' 0 export PYTHONPATH=$(pwd) -for PY_G in python3 ; do +for PY_G in python python3 ; do $PY_G ./yapps2 examples/expr.g examples/expr.py -for PY_X in python3 ; do +for PY_X in python python3 ; do test "$(echo "1+2*3+4" | $PY_X examples/expr.py goal)" = 11 done diff -Nru yapps2-2.2.1/yapps/cli_tool.py yapps2-2.2.1/yapps/cli_tool.py --- yapps2-2.2.1/yapps/cli_tool.py 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/yapps/cli_tool.py 2018-10-15 12:58:00.000000000 +0200 @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python # # Yapps 2 - yet another python parser system diff -Nru yapps2-2.2.1/yapps/parsetree.py yapps2-2.2.1/yapps/parsetree.py --- yapps2-2.2.1/yapps/parsetree.py 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/yapps/parsetree.py 2018-10-15 12:58:00.000000000 +0200 @@ -270,9 +270,9 @@ def generate_output(self): self.calculate() - self.write("from __future__ import print_function\n") self.write(self.preparser) self.write("# Begin -- grammar generated by Yapps\n") + self.write("from __future__ import print_function\n") self.write("import sys, re\n") self.write("from yapps import runtime\n") self.write("\n") diff -Nru yapps2-2.2.1/yapps/runtime.py yapps2-2.2.1/yapps/runtime.py --- yapps2-2.2.1/yapps/runtime.py 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/yapps/runtime.py 2018-10-15 12:58:00.000000000 +0200 @@ -175,7 +175,7 @@ file,line,p = pos if file != self.filename: if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out) - print("(%s: not in input buffer)" % file, file=out) + print >>out, "(%s: not in input buffer)" % file return text = self.input @@ -198,7 +198,7 @@ break spos = cr+1 else: - print("(%s:%d not in input buffer)" % (file,origline), file=out) + print >>out, "(%s:%d not in input buffer)" % (file,origline) return # Now try printing part of the line @@ -227,8 +227,8 @@ p = p - 7 # Now print the string, along with an indicator - print('> ',text, file=out) - print('> ',' '*p + '^', file=out) + print >>out, '> ',text + print >>out, '> ',' '*p + '^' def grab_input(self): """Get more input if possible.""" @@ -316,7 +316,7 @@ del self.tokens[0] self.tokens.append(tok) self.last_read_token = tok - # print(repr(tok)) + # print repr(tok) return tok else: ignore = self.ignore[best_pat] diff -Nru yapps2-2.2.1/yapps2 yapps2-2.2.1/yapps2 --- yapps2-2.2.1/yapps2 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/yapps2 2018-10-15 12:58:00.000000000 +0200 @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python # # Yapps 2 - yet another python parser system diff -Nru yapps2-2.2.1/Yapps2.egg-info/dependency_links.txt yapps2-2.2.1/Yapps2.egg-info/dependency_links.txt --- yapps2-2.2.1/Yapps2.egg-info/dependency_links.txt 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/Yapps2.egg-info/dependency_links.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ - diff -Nru yapps2-2.2.1/Yapps2.egg-info/entry_points.txt yapps2-2.2.1/Yapps2.egg-info/entry_points.txt --- yapps2-2.2.1/Yapps2.egg-info/entry_points.txt 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/Yapps2.egg-info/entry_points.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,3 +0,0 @@ -[console_scripts] -yapps2 = yapps.cli_tool:main - diff -Nru yapps2-2.2.1/Yapps2.egg-info/PKG-INFO yapps2-2.2.1/Yapps2.egg-info/PKG-INFO --- yapps2-2.2.1/Yapps2.egg-info/PKG-INFO 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/Yapps2.egg-info/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 @@ -1,65 +0,0 @@ -Metadata-Version: 1.2 -Name: Yapps2 -Version: 2.2.1 -Summary: Yet Another Python Parser System -Home-page: https://github.com/smurfix/yapps -Author: Amit J. Patel, Matthias Urlichs -Author-email: am...@cs.stanford.edu, sm...@debian.org -Maintainer: Matthias Urlichs -Maintainer-email: sm...@debian.org -License: MIT -Description: YAPPS: Yet Another Python Parser System - ---------------------------------------- - - For the most complete and excellent documentation (e.g. [manual with - examples](http://theory.stanford.edu/~amitp/yapps/yapps2/manual/)) and info, - please see original project website: http://theory.stanford.edu/~amitp/yapps/ - - YAPPS is an easy to use parser generator that is written in Python and generates - Python code. - There are several parser generator systems already available for Python, but - this parser has different goals: Yapps is simple, very easy to use, and produces - human-readable parsers. - - It is not the fastest or most powerful parser. - Yapps is designed to be used when regular expressions are not enough and other - parser systems are too much: situations where you might otherwise write your own - recursive descent parser. - - This fork contains several upward-compatible enhancements to the original - YAPPS source, originally included in [debian package](http://packages.debian.org/sid/yapps2): - - * Handle stacked input ("include files"). - * Augmented ignore-able patterns (can parse multi-line C comments correctly). - * Better error reporting. - * Read input incrementally. - - - Installation - ---------------------------------------- - - It's a regular package for Python 2.7 (not 3.X, but there are links to 3.X - patches listed on the [original author - website](http://theory.stanford.edu/~amitp/yapps/)), but not in pypi, so can be - installed from a checkout with something like that: - - % python setup.py install - - Better way would be to use [pip](http://pip-installer.org/) to install all the - necessary dependencies as well: - - % pip install 'git+https://github.com/mk-fg/yapps.git#egg=yapps' - - Note that to install stuff in system-wide PATH and site-packages, elevated - privileges are often required. - Use "install --user", - [~/.pydistutils.cfg](http://docs.python.org/install/index.html#distutils-configuration-files) - or [virtualenv](http://pypi.python.org/pypi/virtualenv) to do unprivileged - installs into custom paths. - - Alternatively, `./yapps2` can be run right from the checkout tree, without any - installation. - - No extra package dependencies. - -Platform: UNKNOWN diff -Nru yapps2-2.2.1/Yapps2.egg-info/SOURCES.txt yapps2-2.2.1/Yapps2.egg-info/SOURCES.txt --- yapps2-2.2.1/Yapps2.egg-info/SOURCES.txt 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/Yapps2.egg-info/SOURCES.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,12 +0,0 @@ -README.md -setup.py -Yapps2.egg-info/PKG-INFO -Yapps2.egg-info/SOURCES.txt -Yapps2.egg-info/dependency_links.txt -Yapps2.egg-info/entry_points.txt -Yapps2.egg-info/top_level.txt -yapps/__init__.py -yapps/cli_tool.py -yapps/grammar.py -yapps/parsetree.py -yapps/runtime.py \ Kein Zeilenumbruch am Dateiende. diff -Nru yapps2-2.2.1/Yapps2.egg-info/top_level.txt yapps2-2.2.1/Yapps2.egg-info/top_level.txt --- yapps2-2.2.1/Yapps2.egg-info/top_level.txt 2023-08-17 02:01:56.000000000 +0200 +++ yapps2-2.2.1/Yapps2.egg-info/top_level.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -yapps