yentl
/
modelverse


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008
							"""
Author: Bruno Barroca
Date: October 2014
Description: A top down parser

Modifications by Daniel Riegelhaupt:
*removed test input
*changed pos to startpos because in my humble opinion it makes more sense to have a tupple (startpos, endpos) than (pos, endpos)
*aded parameters to init: tab_size, line_position, hide_implicit
 - see init comments for more info on all otions
 - line_postion will change startpos and end Pos to instance of class Postion. (changed december 2014)
*Added anonymous terminals: tokens do not have to be defined as tokens but can be typed directly in rules
*changed interleave function to be deep, and start on START
*changed position returned in tree to be relative to line numbers instead of the absolute one
 - Did the same for partialresults returned on syntax error change this is error results too
 - TODO check efficiency on the previous point checking the whole text for every position might be slow
*Changed usage , instead of Parser(input, grammar).pars() it is now Parser(grammar).parse(input)
 - Added a self.reset() method for fields that need to be initializes again when parsing a new input
*Changed findFailure and generateErrorReports:
 * i need the the rule/token name as well not only the error text
 * hidden  elements (like for example comments and newline ) are not included in error reports if hide_implicit is set to true
 * same for the interleave rule
"""

import re
import sys

from copy import deepcopy 

line_cache = {}

def get_buffer(base, start):
    if sys.version_info >= (3, ):
        # TODO find an efficient implementation for Python3...
        return base[start:]
    else:
        return buffer(base, start)

class Tree(object):
    @staticmethod
    def is_rule(tree):
        return tree['head'].islower()

    @staticmethod
    def is_token(tree):
        return not Tree.is_rule(tree)

    @staticmethod
    def get_tail(tree):
        if Tree.is_rule(tree):
            if not tree.get("_tail", None):
                tree['_tail'] = [t for t in Tree.get_raw_tail(tree) if not t['head'].startswith("implicit_autogenerated_")]
            return tree['_tail']
        else:
            return tree['tail']

    @staticmethod
    def get_raw_tail(tree):
        return tree['tail']

    @staticmethod
    def get_text(tree, with_implicit=False):
        parts = []

        if with_implicit:
            tail = Tree.get_raw_tail
        else:
            tail = Tree.get_tail

        def post_order(tree):
            for child in tail(tree):
                if "replaced" in child:
                    child = child['replaced']
                if isinstance(child, dict):
                    post_order(child)
                else:
                    parts.append(child)

        post_order(tree)

        return ''.join(parts)

    @staticmethod
    def get_child(tree, name):
        for child in Tree.get_tail(tree):
            if child['head'] == name:
                return child
        return None

    @staticmethod
    def get_children(tree, name):
        children = []
        for child in Tree.get_tail(tree):
            if child['head'] == name:
                children.append(child)
        return children

    @staticmethod
    def replace_child(tree, old_child, new_child):
        new_child['replaced'] = old_child

        i = Tree.get_raw_tail(tree).index(old_child)
        Tree.get_raw_tail(tree)[i] = new_child

        i = Tree.get_tail(tree).index(old_child)
        Tree.get_tail(tree)[i] = new_child

    @staticmethod
    def get_tail_without(tree, names):
        if Tree.is_rule(tree):
            return [t for t in Tree.get_tail(tree) if not t['head'] in names]
        else:
            return Tree.get_raw_tail(tree)

    @staticmethod
    def get_reference_line(tree):
        return "%s:%s:%s-%s" % (tree['inputfile'], tree['startpos'][0], tree['startpos'][1], tree['endpos'][1])

    @staticmethod
    def fix_tracability(tree, inputfile):
        if 'inputfile' not in tree:
            tree['inputfile'] = inputfile
        for t in tree['tail']:
            if isinstance(t, dict):
                Tree.fix_tracability(t, tree['inputfile'])

class Parser(object):
    class Constants(object):
        Token = 'token'
        Production = 'prod'
        Success = 'success'
        Failure = 'failure'

    class LR(object):
        def __init__(self, seed, rulename, head, nextlr):
            self.seed = seed
            self.rule = rulename
            self.head = head
            self.next = nextlr

        def copy(self):
            return Parser.LR(self.seed, self.rule, self.head, self.next)

    class Head(object):
        def __init__(self, rulename, involved, evaluation):
            self.rule = rulename
            self.involved = involved
            self.evaluation = evaluation

    def __init__(self, grammar, **options):
        """
        creates a Parser for the given grammar
        :param grammar: An instance of the Grammar class
        :param options: the following options are supported:
            tab_size: default 1. sets the character size of a tab character
            hide_implicit: default False. when true implicit tokens are hidden from the returned parse tree and error message.
                           Note that this this option will not override rules or tokens where the hidden variable has already been set manually in the Grammar class
            line_position: default False. when true we use line, column Position object instead of absolute position integer in the parse tree for startpos and endpos
        """
        #changed by Daniel: members that need to be initialized each time parse is a called have been put in def reset()
        #that method is called when the parse() method is called
        self.rules = deepcopy(grammar.rules)
        self.tokens = deepcopy(grammar.tokens)
        self.implicitList = [] #added by Daniel, set in hideImplict so that we can review the implicit list in case of error messages
        self.implictRuleName = ""

        #options Added by Daniel
        self.tabsize = int(options.pop('tab_size', 1)) #the character size of a tab
        self.hideImplicit = bool(options.pop('hide_implicit', False))
        #whether to hide implicit tokens and rules from the returned parse tree
        #Important note: this option will not override rules or tokens where the hidden variable has already been set manually
        self.linePosition = bool(options.pop('line_position', False))
        #if true the position of the returned parse tree will consist of a line and a column instead of the position in the string array

        #preprocess must happen after options, (after hideImplicit has been set)
        self.preprocess()

    def reset(self):
        self.input = ""
        self.memotable = {}
        self.failure = {}
        self.lrstack = None
        self.heads = {}
        self.countcard = {}


    def preprocess(self):
        #for elem in self.rules.keys(): #Changed by Daniel: we only check start because it's global
        elem = 'start'
        if elem in self.rules.keys():
            if ('interleave' in self.rules[elem]):
                ilist = self.rules[elem]['interleave']
                self.setHideImplicit(ilist, self.hideImplicit)
                self.interleave(self.rules[elem], ilist)

    def setHideImplicit(self, ilist, bool= False):
        if ilist:
            #ilist = ['?', '@rulename']
            rulename= ilist[1][1:]
            self.implictRuleName = rulename #used to hide later error reports later
            self.rules[rulename]['hidden'] = bool
            if rulename in self.rules:
                body = self.rules[rulename]['body']
                #body = [*, [| ,,,,]]
                elems= body[1][1:]
                self.implicitList = elems
                for elem in elems:
                    l = None
                    error = ''
                    if elem[0] == '@':
                        l = self.rules
                        error =  ' rule not found in grammar rules.'
                    elif elem[0]== '$':
                        l = self.tokens
                        error =  ' token not found in grammar rules.'
                    #else: in this case it is an anonymous token,

                    if l:
                        name = elem[1:]
                        if name in l:
                            if not 'hidden' in l[name]:
                                #this method will not override anything the user has explicitly specified in the structure
                                #if there is already a hidden value there it will be kept even if it is not the same one
                                #an examples use case is whitespaces vs comments:
                                #both can appear anywhere in the text and so are implicit in the grammar.
                                #however we dont want spaces in the tree but we do want the comments
                                l[name]['hidden'] = bool
                        else:
                             raise Exception(name + error)
                    #else: Anon token can't be ignored for the  moment unless we create an ignore list for it or something like that.

            else:
                raise Exception(rulename + ' rule not found in grammar rules.')


    def interleave(self, elem, ilist):
        #quick and simple interleaving method, will probably contain double interleaving
        #but this is as simple as i could make it without taking into account each and every case
        def quickInterLeave(lst, inter):
            newL = []
            newL.append(lst[0])
            isSeq = self.isSequence(lst[0])
            for item in lst[1:]:
                if (isinstance(item, list)):#a sublist
                    newL.append(quickInterLeave(item,inter))
                else:
                    if(item[0] == '@'): #rule
                        rulename = item [1:]
                        if rulename in self.rules:
                            rule = self.rules[rulename]
                            if not 'visited' in rule or rule['visited'] == False:
                                self.interleave(rule, inter)
                        else:
                            raise Exception(rulename + ' rule not found in grammar rules.')
                    """
                    Else:
                        pass
                    in this case it is a token or anon token we dont need to do anything special,
                    just add it to the list interleaved
                    """

                    if isSeq: # no need to complicate the data structure if the list is a sequence
                        if not newL[-1] == inter:
                            newL.append(inter)
                        newL.append(item)
                        newL.append(inter)
                    else:
                        newL.append(['.', inter,item ,inter])

                        """
                        This way in case the list is not a sequence this doesnt change the meaning of the list:
                        example: t1, t2 are tokens, i is an optional whitespace being intereleaved
                        [., t1, t2] ->  [., i ,t1, i, t2]
                        the meaning stays the same:
                        t1 and t2 both have ot be found for the rule to apply regardless of the ws
                        [|, t1, t2] ->  [|, i ,t1, i, t2]
                        the meaning changed: if i is encountered the or is satisfied:
                        so instead we do ->  [|, [., i ,t1, i,], [., i ,t2, i,]]

                        note that while inter has been added to the data stricture 4 times it will only match
                        for one option so it is not really duplicate.
                        another way of writing this can be [., inter [|, t1, t2], inter ] but this is easier said than
                        done especially for big (complex) data structures
                        """
            return newL

        #the first thing we do is say that the item has been visited this will avoid infinite loop due to recursion
        elem['visited'] = True
        if (not 'body' in elem):
            return

        ls = elem['body']
        newbody = quickInterLeave(ls,ilist)
        elem['body'] = newbody

    def parse(self, text):
        self.reset() #Changed by Daniel receive text as param. instead of once at init so first we reset the fields
        self.input = text
        results = self.applyrule('@start', 0)
        if len(results) > 1:
            # Handle ambiguity
            from hutn_compiler.prettyprint_visitor import PrettyPrintVisitor
            for p in results:
                print("===================================")
                print("VISIT RESULT")
                print("===================================")
                visitor = PrettyPrintVisitor([])
                visitor.visit(p["tree"])
                print(visitor.dump())
            result = self.generateErrorReport()
        elif (results == [] or results[0]['endpos'] < len(self.input)):
            result = self.generateErrorReport()
            for elem in result['partialresults']: #Added by Daniel there was no post processing on partial results. I need it
                if elem['tree']: #with partial results the tree can be None
                    elem['tree'] = IgnorePostProcessor(self.rules, self.tokens).visit(elem['tree'])
                    if self.linePosition:
                        # elem['tree'].startpos = 0
                        # elem['tree'].endpos = 0
                        elem['tree'] = Parser.PositionPostProcessor(self.convertToLineColumn).visit(elem['tree']) #Added by Daniel
        elif len(results) == 1:
            result = results[0]
            result.update({'status': Parser.Constants.Success})
            if result['tree']['head'] != 'start':
                result['tree'] = {"head": "start", "tail": [result['tree']], "startpos": result['tree']['startpos'], "endpos": result['tree']['endpos']}
            result['tree'] = IgnorePostProcessor(self.rules, self.tokens).visit(result['tree'])
            if self.linePosition: #Added by Daniel
                result['tree'] = Parser.PositionPostProcessor(self.convertToLineColumn).visit(result['tree'])
        return result

    def generate_line_cache(self):
        global line_cache
        if self in line_cache:
            return

        line_cache[self] = []
        lc = line_cache[self]

        line = 1
        column = 0

        for i in self.input:
            if i == "\n":
                line += 1
                column = 0
            elif i == "\t":
                column += self.tabsize
            else:
                column += 1
            lc.append((line, column))

    def convertToLineColumn(self, pos):
        global line_cache
        self.generate_line_cache()

        if pos < len(line_cache[self]):
            return {'line': line_cache[self][pos][0], 'column': line_cache[self][pos][1]}
        else:
            return {'line': line_cache[self][-1][0], 'column': line_cache[self][-1][1] + 1}

    def findlargerresultat(self, pos):
        endpos = pos
        result = None
        for key in self.memotable.keys():
            elem = self.memotable[key]
            if (elem == []):
                continue
            if (elem[0]['startpos'] == pos and endpos < elem[0]['endpos']):
                endpos = elem[0]['endpos']
                result = elem[0]
        return result

    def generateErrorReport(self):
        # consult the memotable and collect contiguities until endpos
        endpos = len(self.input) - 1
        pos = 0
        elems = []
        while pos <= endpos:
            elem = self.findlargerresultat(pos)
            if (not elem or (elem and elem['endpos'] == pos)):
                break
            pos = elem['endpos']
            elems.append(elem)
        if (pos <= endpos):
            elems.append({'tree': None, 'startpos': pos, 'endpos': endpos})

        elem = self.getFirstBiggestSpan(elems)

        if elem is None:
            return {'status': Parser.Constants.Failure, 'line': 0, 'column': 0, 'text': "Empty input file", 'partialresults': [], 'grammarelements': None}
        reasons = self.findFailure(elem['startpos'], elem['endpos'])
        if (reasons == []):
            pos -= 1
        else:
            pos = reasons[0]['startpos']
        read = self.input[pos:pos + 1]
        linecolumn = self.convertToLineColumn(pos)
        message = 'Syntax error at line ' + str(linecolumn['line']) + ' and column ' + str(linecolumn['column']) + '. '
        keys = []
        if (not reasons == []):
            first = True
            for reason in reasons:
                if (first):
                    message += 'Expected ' + reason['text']
                    first = False
                else:
                    message += ' or ' + reason['text']
                keys.append(reason['key'])
            message += '. Instead read: ' + repr(read) + '.'
        else:
            message += 'Read: \'' + read + '\'.'

        return {'status': Parser.Constants.Failure, 'line': linecolumn['line'], 'column': linecolumn['column'],
                'text': message, 'partialresults': elems, 'grammarelements': keys}

    def getFirstBiggestSpan(self, elems):
        biggestspan = 0
        result = None
        for elem in elems:
            span = elem['endpos'] - elem['startpos']
            if (biggestspan < span):
                result = elem
                span = biggestspan
        return result

    def findFailure(self, pos, endpos):
        posreasons = []
        endposreasons = []

        #changed by Daniel:
        #* i need the key as well for autocomplete so in stead of appending elem i return a new dictionary with elem and the key inside
        #* checks both condition for posreasons and endposreasons in one for loop instead of 2
        #* do not cosider keys that are hidden


        for key in self.failure.keys():
            #keys are given starting either with $ for tokens or @ for rules
            #howver with the the given metagrammar Tokens are all caps and rules are all in small letters so there cant be an overlapp
            #and we can safely test both
            if self.hideImplicit and\
               (('$' + key in self.implicitList) or ('@' + key in self.implicitList) or (key == self.implictRuleName)):
                continue
            else:
                elem = self.failure[key]
                if (elem['startpos'] == pos and not elem['text'] == ''):
                    posreasons.append({'key': key, 'startpos': elem['startpos'] , 'text': elem['text'] })

                if (elem['startpos'] == endpos and not elem['text'] == ''):
                    endposreasons.append({'key': key, 'startpos': elem['startpos'] , 'text': elem['text'] })

        if (len(endposreasons) < len(posreasons)):
            return posreasons
        else:
            return endposreasons

    def setupLR(self, rule, elem):
        if (elem.head == None):
            elem.head = Parser.Head(rule, [], [])

        s = self.lrstack
        while s and not s.rule == elem.head.rule:
            s.head = elem.head
            if (not s.rule in elem.head.involved):
                elem.head.involved.append(s.rule)
            s = s.next

    def recall(self, rule, j):
        newresults = []
        if ((rule, j) in self.memotable):
            newresults = self.memotable[(rule, j)]

        h = None
        if (j in self.heads):
            h = self.heads[j]
        if (not h):
            return newresults

        if (newresults == [] and not rule in (h.involved + [h.rule])):
            return []  # [{'tree': [], 'startpos': j, 'endpos': j}]

        if (rule in h.evaluation):
            h.evaluation.remove(rule)
            newresults = self.eval(rule, j)
            self.memotable.update({(rule, j): newresults})

        return newresults

    def applyrule(self, rule, j):
        overallresults = []

        newresults = self.recall(rule, j)
        if (not newresults == []):
            memoresults = []
            for elem in newresults:
                if (isinstance(elem['tree'], Parser.LR)):
                    self.setupLR(rule, elem['tree'])
                    memoresults += elem['tree'].seed
                else:
                    overallresults.append(elem)
            if (not memoresults == []):
                self.memotable.update({(rule, j): memoresults})
                return memoresults
            return overallresults
        else:
            #lr = Parser.LR([], rule, None, deepcopy(self.lrstack))
            lr = Parser.LR([], rule, None, None if not self.lrstack else self.lrstack.copy())
            self.lrstack = lr
            #self.memotable.update({(rule, j): [{'tree': lr, 'startpos': j, 'endpos': j}]})
            self.memotable[(rule, j)] = [{'tree': lr, 'startpos': j, 'endpos': j}]

            newresults = self.eval(rule, j)
            self.lrstack = self.lrstack.next

            memoresults = []
            if ((rule, j) in self.memotable):
                memoresults = self.memotable[(rule, j)]
            for melem in memoresults:
                if (isinstance(melem['tree'], Parser.LR) and melem['tree'].head):
                    melem['tree'].seed = newresults
                    r = self.lr_answer(rule, j, melem)
                    if (not r == []):
                        overallresults += r
            if (overallresults != []):  # prefer grown results
                return overallresults

            self.memotable.update({(rule, j): newresults})
            return newresults

    def lr_answer(self, rule, pos, melem):
        h = melem['tree'].head
        if (not h.rule == rule):
            return melem['tree'].seed
        else:
            melems = melem['tree'].seed
            result = []
            for melem_i in melems:
                if (not melem_i['tree'] == None):
                    result.append(melem_i)
            if (result == []):
                return []
            else:
                newresult = []
                for melem_i in result:
                    newresult.append(self.growLR(rule, pos, melem_i, h))
                return newresult

    def growLR(self, rule, pos, melem, head=None):
        self.heads.update({pos: head})
        while (True):
            overallresults = []
            head.evaluation = deepcopy(head.involved)
            newresults = self.eval(rule, pos)
            for elem in newresults:
                if (elem['endpos'] > melem['endpos']):
                    melem = elem
                    overallresults.append(elem)
            if (overallresults == []):
                self.heads.update({pos: None})
                return melem
            self.memotable.update({(rule, pos): overallresults})

    def eval(self, rulename, j):
        # Returns [{'tree':Tree(head=rulename, tail=[...], startpos=j, endpos=x), 'startpos':j, 'endpos':x}]
        # Raises Exception if there is no such token/rule
        if (rulename[0] == '@'):
            rulename = rulename[1:]
            if (not rulename in self.rules):
                raise Exception(rulename + ' rule not found in grammar rules.')
            rule = self.rules[rulename]
        elif (rulename[0] == '$'):
            rulename = rulename[1:]
            if (not rulename in self.tokens):
                raise Exception(rulename + ' token not found in grammar tokens.')
            rule = self.tokens[rulename]
        else:
            # raise Exception('Plain terminals not allowed inside grammar rules: ' + str(rulename))
            # we create an anonymous token rule
            # we can write whatever we want as fake type as long as it is not equal to the type of the prodcution rule
            # or to that of the token
            rule = {'type': 'anonymous_token'}

        if (self.isType(rule, Parser.Constants.Production)):
            newresults = []
            results = self.eval_body(rulename, rule['body'], j)
            for r in results:
                if (r['tree']):
                    head = r['tree']['head']
                    if(head == '*' or head == '+' or head == '?' or head == '|' or head == '.'):
                        newr = {"tree": {"head": rulename, "tail": [r['tree']], "startpos": r['startpos'], 'endpos': r['endpos']}, "startpos": r['startpos'], "endpos": r["endpos"]}
                        r = newr
                newresults.append(r)
        elif (self.isType(rule, Parser.Constants.Token)):
            newresults = self.term(rulename, j)
        else: ##Changed by Daniel: if not a production rule or defined token we try an anonymous token:
            newresults = self.anonTerm(rulename, j)
        return newresults

    def eval_body(self, rulename, ls, j):
        # Delegates the task to sub-functions: alt, seq, opt, many, more, card
        # Returns
        # Raises Exception if the first element in the body is not in {'|', '.', '?', '*', '+', '#'}
        if (self.isAlternative(ls[0])):
            return self.alt(rulename, ls[1:], j)
        elif (self.isSequence(ls[0])):
            return self.seq(rulename, ls[1:], j)
        elif (self.isOptional(ls[0])):
            return self.opt(rulename, ls[1:], j)
        elif (self.isMany(ls[0])):
            return self.many(rulename, ls[1:], j)
        elif (self.isMore(ls[0])):
            return self.more(rulename, ls[1:], j)
        elif (self.isCard(ls[0])):
            return self.card(rulename, ls[0][1:], ls[1:], j)
        raise Exception('Unrecognized grammar expression: ' + str(ls[0]))

    def isSequence(self, operator):
        return operator == '.'

    def isAlternative(self, operator):
        return operator == '|'

    def isMany(self, operator):
        return operator == '*'

    def isCard(self, operator):
        return operator.startswith('#')

    def isMore(self, operator):
        return operator == '+'

    def isOptional(self, operator):
        return operator == '?'

    def isType(self, rule, oftype):
        if (rule['type'] == oftype):
            return True

    def term(self, rulename, j):
        if (j >= len(self.input)):
            errortext = ''
            if (rulename in self.tokens and 'errortext' in self.tokens[rulename]):
                errortext = self.tokens[rulename]['errortext']
            self.failure.update({rulename: {'startpos': j, 'text': errortext}})
            return []

        rule = self.tokens[rulename]
        #mobj = re.match(rule['reg'], self.input[j:])
        mobj = re.match(rule['reg'], get_buffer(self.input, j))
        #Changed by daniel instead of re.match(reg) did re.match(re.compile(reg).patern)
        #this is to avoid problems with \ before i did this i had the match the character \ by doing [\\\\]
        # because to write only two slashes it would have to be r'[\\]' which cant be done directly in hte grammar so it had to be in string form
        #this way reading [\\] will be interpreted correctly instead of giving an error like it used to
        if (not mobj):
            # this is a failure! nice to register!
            self.failure.update({rulename: {'startpos': j, 'text': self.tokens[rulename]['errortext']}})
            return []
        return [{'tree': {"head": rulename, "tail": [mobj.group()], "startpos": j, "endpos": j + mobj.end()}, 'startpos': j, 'endpos': j + mobj.end()}]

    def anonTerm(self, term, j):
        """
        #Changed by Daniel: added this whole method.
        Anonymous term to allow for direct terminals in rules
        (write 'Foo' directly instead of having to deine a FOO token)
        """
        qt = '\''
        name = qt + term + qt
        if (j >= len(self.input)):
            self.failure.update({ name : {'startpos': j, 'text': name}})
            return []

        mobj = re.match(term, get_buffer(self.input, j))
        if (not mobj):
            # this is a failure! nice to register!
            self.failure.update({ name : {'startpos': j, 'text': name }})
            return []
        return [{'tree': {"head": name, "tail": [mobj.group()], "startpos": j, "endpos": j + mobj.end()}, 'startpos': j, 'endpos': j + mobj.end()}]

    def many(self, rulename, ls, j):

        rule_i = ls[0]
        if (isinstance(rule_i, list)):
            results = self.eval_body('*', rule_i, j)
        else:
            results = self.applyrule(rule_i, j)

        if (results == []):
            return [{'tree': None, 'startpos': j, 'endpos': j}]

        seq = ['.'] + ls + [['*'] + ls]

        results = self.eval_body('*', seq, j)
        overall_results = []
        for r in results:
            if (r['tree']):
                if (len(r['tree']['tail']) > 1):
                    left = r['tree']['tail'][0]
                    right = r['tree']['tail'][1]['tail']
                    r['tree']['tail'] = [left] + right
                overall_results.append(r)


        return overall_results

    def more(self, rulename, ls, j):

        rule_i = ls[0]
        if (isinstance(rule_i, list)):
            results = self.eval_body('+', rule_i, j)
        else:
            results = self.applyrule(rule_i, j)

        if (results == []):
            return []

        seq = ['.'] + ls + [['*'] + ls]

        results = self.eval_body('+', seq, j)
        overall_results = []
        for r in results:
            if (r['tree']):
                if (len(r['tree']['tail']) > 1):
                    left = r['tree']['tail'][0]
                    right = r['tree']['tail'][1]['tail']
                    r['tree']['tail'] = [left] + right
                overall_results.append(r)

        return overall_results

    def opt(self, rulename, ls, j):
        if (j >= len(self.input)):
            errortext = ''
            if (rulename in self.rules and 'errortext' in self.rules[rulename]):
                errortext = self.rules[rulename]['errortext']
            else:
                for item in ls:
                    if ((not isinstance(item[1:], list)) and item[1:] in self.rules):
                        errortext = self.rules[item[1:]]['errortext']
            self.failure.update({rulename: {'startpos': j, 'text': errortext}})
            return [{'tree': None, 'startpos': j, 'endpos': j}]

        results = []
        rule_i = ls[0]
        if (isinstance(rule_i, list)):
            results = self.eval_body('?', rule_i, j)
        else:
            results = self.applyrule(rule_i, j)

        if (not results == []):
            return results

        # empty case
        return [{'tree': None, 'startpos': j, 'endpos': j}]

    def card(self, rulename, cardrule, ls, j):
        count = 0
        delta = 1
        #  a# a#(-1) #indent, #(-1)indent
        group = re.match('\((?P<delta>[-+]?\d+)\)(?P<rule>\S+)',cardrule)

        if(group):
            cardrule = group.group('rule')
            delta = int(group.group('delta'))

        if (not cardrule in self.countcard):
            count = delta
            self.countcard[cardrule] = {j: count}
        else:
            if not j in self.countcard[cardrule]:  # # if we already know the count for j, then ignore..
                d = self.countcard[cardrule]
                lastcount = 0
                for i in sorted(d.keys()):
                    if i < j:
                        lastcount = d[i]
                    else:
                        break
                count = lastcount + delta
                d[j] = count
            else:
                count = self.countcard[cardrule][j]

        results = []
        rule_i = '@' + cardrule
        if(count == 0):
            results = [{'tree': None, 'startpos': j, 'endpos': j}]
        else:
            for i in range(0, count):
                if (results == []):
                    if (isinstance(rule_i, list)):
                        newresults = self.eval_body(rulename, rule_i, j)
                    else:
                        newresults = self.applyrule(rule_i, j)
                    if (newresults == []):
                        del self.countcard[cardrule][j]
                        return []
                    newresults = self.merge(rulename, newresults, {'startpos': j, 'endpos': j})
                else:
                    for elem_p in results:
                        if (isinstance(rule_i, list)):
                            newresults = self.eval_body(rulename, rule_i, elem_p['endpos'])
                        else:
                            newresults = self.applyrule(rule_i, elem_p['endpos'])
                        if (newresults == []):
                            del self.countcard[cardrule][j]
                            return []
                        newresults = self.merge(rulename, newresults, elem_p)
                results = newresults

        for rule_i in ls:
            for elem_p in results:
                if (isinstance(rule_i, list)):
                    newresults = self.eval_body(rulename, rule_i, elem_p['endpos'])
                else:
                    newresults = self.applyrule(rule_i, elem_p['endpos'])
                if (newresults == []):
                    del self.countcard[cardrule][j]
                    return []
                newresults = self.merge(rulename, newresults, elem_p)
                results = newresults

        del self.countcard[cardrule][j]
        return results

    def seq(self, rulename, ls, j):
        #
        results = []
        for rule_i in ls:
            if (results == []):
                if (isinstance(rule_i, list)):
                    newresults = self.eval_body('.', rule_i, j)
                else:
                    newresults = self.applyrule(rule_i, j)
                if (newresults == []):
                    return []
                newresults = self.merge('.', newresults, {'startpos': j, 'endpos': j})
            else:
                r = []
                for elem_p in results:
                    if (isinstance(rule_i, list)):
                        newresults = self.eval_body('.', rule_i, elem_p['endpos'])
                    else:
                        newresults = self.applyrule(rule_i, elem_p['endpos'])
                    if (newresults == []):
                        return []
                    newresults = self.merge('.', newresults, elem_p)
            results = newresults
        return results

    def merge(self, rulename, newres, elem_p):
        # Brief: tail of each new tree needs to be prepended with tail of the previous tree
        # rulename: becomes the head of each tree in the returned list
        # newres: may have more than one tree in case of alt operator: 'x' ('a' | 'b') 'y'
        #         tail of each new tree needs to be prepended with tail of previous tree
        # Returns same list as eval: [{'tree':Tree(head=rulename, tail=[...], startpos=j, endpos=x), 'startpos':j, 'endpos':x}]
        results = []
        for elem_n in newres:
            tail = []
            if ('tree' in elem_p and elem_p['tree']):
                tail += elem_p['tree']['tail']
            if ('tree' in elem_n and elem_n['tree']):
                tail.append(elem_n['tree'])
            value = {'tree': {"head": rulename, "tail": tail, "startpos": elem_p['startpos'], "endpos": elem_n["endpos"]}, 'startpos': elem_p['startpos'], 'endpos': elem_n['endpos']}
            results += [value]
        return results

    def alt(self, rulename, ls, j):
        # Evaluates all alternatives using eval_body or applyrule
        # Returns same list as eval: [{'tree':Tree(head=rulename, tail=[...], startpos=j, endpos=x), 'startpos':j, 'endpos':x}]
        overall_results = []
        results = [] # TODO: remove this variable as it's never used
        for rule_i in ls:
            if (isinstance(rule_i, list)):
                newresults = self.eval_body('|', rule_i, j)
            else:
                newresults = self.applyrule(rule_i, j)
            overall_results += newresults

        return overall_results

    class PositionPostProcessor(object):
        """
        This post processor changes absolute position (place in the parsed string )to a line, column position
        added by Daniel
        """
        """
        efficiency note:
        how effective is this. this might be slowing things down quit a bit having to calculate that for everything
        1) an alternative would be use the method only for the leaves, and that traverse the tree bottom up to  create
        the interval using the left most and right most children of each subtree. but since tat involves extra tree
        traversal that might not help that much.

        2) another thing that might improve efficiency is to create change the position calculating method:
        create one that doesnt scan the whole text for new line each time we calculate a position,
        but creates a table of them the first time.
        we can calculate the line by returning the index in the table of the the new line the closest to the given
        position and the column is the difference between the position of that newline and the column (maybe + or - 1,
        check that)
        in case this method doesn't slow things down too much ignore this
        """
        def __init__(self, method):
            self.calcPosMethod = method

        def inner_visit(self,tree):
            startDic = self.calcPosMethod(tree['startpos'])
            endDic = self.calcPosMethod(tree['endpos'])
            tree['startpos'] = (startDic["line"], startDic["column"])
            tree['endpos'] =  (endDic["line"], endDic["column"])
            for item in tree['tail']:
                if (isinstance(item, dict)):
                    self.inner_visit(item)

        def visit(self, tree):
            if tree:
                self.inner_visit(tree)
            return tree


    class DefaultPrinter(object):
        def __init__(self, output='console'):
            self.outputStream = ''
            self.output = output

        def inner_visit(self, tree):
            for item in tree['tail']:
                if (isinstance(item, dict)):
                    self.inner_visit(item)
                else:
                    self.outputStream += item

        def visit(self, tree):
            self.inner_visit(tree)
            if (self.output == 'console'):
                print(self.outputStream)

    class PrettyPrinter(object):
        def __init__(self, output='console'):
            self.outputStream = ''
            self.output = output
            self.tabcount = -1

        def tab(self):
            tabspace = ''
            for i in range(0, self.tabcount):
                tabspace += '    '
            return tabspace

        def inner_visit(self, tree):
            self.tabcount += 1
            self.outputStream += self.tab()
            self.outputStream += 'node ' + tree['head'] + ':\n'
            for item in tree['tail']:
                if (isinstance(item, dict)):
                    self.inner_visit(item)
                else:
                    self.tabcount += 1
                    self.outputStream += self.tab() + item + ' @' + str(tree.startpos) + ' to ' + str(
                        tree.endpos) + ' \n'
                    self.tabcount -= 1
            self.tabcount -= 1

        def visit(self, tree):
            self.inner_visit(tree)
            if (self.output == 'console'):
                print(self.outputStream)

class IgnorePostProcessor(object):
    def __init__(self, rules, tokens):
        self.rules = rules
        self.tokens = tokens

    def inner_visit(self, tree):
        results = []
        if (isinstance(tree, dict)):
            if (self.isHidden(tree['head'])):
                for item in tree['tail']:
                    ivlist = []
                    ivresult = self.inner_visit(item)
                    for elem in ivresult:
                        if (isinstance(elem, dict)):
                            ivlist += [elem]
                    results += ivlist
            else:
                tlist = []
                for item in tree['tail']:
                    tlist += self.inner_visit(item)
                tree['tail'] = tlist
                results += [tree]
            return results

        return [tree]

    def visit(self, tree):
        # start cannot be hidden
        tlist = []
        for item in tree['tail']:
            tlist += self.inner_visit(item)
        tree['tail'] = tlist
        return tree

    def isHidden(self, head):
        if (head == '*' or head == '+' or head == '?' or head == '|' or head == '.'):
            return True
        if (head in self.rules):
            return 'hidden' in self.rules[head] and self.rules[head]['hidden']
        elif (head in self.tokens): #Changed by Daniel: added elif condition and return false otherwise, need for anon tokens
            return 'hidden' in self.tokens[head] and self.tokens[head]['hidden']
        else:
            return False