123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215 |
- try:
- import cPickle as pickle
- except ImportError:
- import pickle as pickle
- import os
- import sys
- import time
- from interface.HUTN.hutn_compiler.grammar_compiler_visitor import GrammarCompilerVisitor
- from interface.HUTN.hutn_compiler.hutnparser import Parser, Tree
- from interface.HUTN.hutn_compiler.meta_grammar import Grammar
- from interface.HUTN.hutn_compiler.cached_exception import CachedException
- import hashlib
- global parsers
- parsers = {}
- sys.setrecursionlimit(2000)
- def read(filename):
- with open(filename, 'r') as f:
- return f.read()
- def md5digest(data):
- hasher = hashlib.md5()
- try:
- data = data.encode('utf-8')
- except AttributeError:
- pass
- hasher.update(data)
- return hasher.hexdigest()
- def fetch_cached(data, mode=None):
- try:
- md5 = md5digest(data)
- cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__))))
- if mode is None:
- picklefile = cache_folder + "/%s.pickle" % md5
- else:
- picklefile = cache_folder + "/%s_%s.pickle" % (mode, md5)
- with open(picklefile, "rb") as f:
- return pickle.load(f)
- except:
- return None
- def make_cached(original_data, data, mode=None):
- md5 = md5digest(original_data)
- cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__))))
- if mode is None:
- picklefile = cache_folder + "/%s.pickle" % md5
- else:
- picklefile = cache_folder + "/%s_%s.pickle" % (mode, md5)
- with open(picklefile, "wb") as f:
- pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
- def do_parse(inputfile, grammarfile):
- if grammarfile not in parsers:
- grammar = fetch_cached(read(grammarfile))
- if grammar is None:
- result = parser = Parser(Grammar(), hide_implicit = True).parse(read(grammarfile))
- if result['status'] != Parser.Constants.Success:
- print('not a valid grammar!')
- print(result)
- tree = result['tree']
- visitor = GrammarCompilerVisitor()
- structure = visitor.visit(tree)
- grammar = Grammar()
- grammar.rules = structure['rules']
- grammar.tokens = structure['tokens']
- make_cached(read(grammarfile), grammar)
- parsers[grammarfile] = grammar
- else:
- grammar = parsers[grammarfile]
- result = fetch_cached(read(inputfile))
- if result is None:
- result = Parser(grammar, line_position = True).parse(read(inputfile))
- if result['status'] != Parser.Constants.Success:
- lines = open(inputfile, 'r').readlines()
- begin_line = max(result["line"] - 3, 0)
- end_line = min(result["line"] + 3, len(lines))
- lines = lines[begin_line:end_line]
- lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)]
- lines = "".join(lines)
- msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines)
- raise Exception(msg)
- make_cached(read(inputfile), result)
- return result
- def find_file(filename, include_paths):
- import os.path
- include_paths = ["."] + \
- [os.path.abspath(os.path.dirname(working_file))] + \
- [os.path.abspath("%s/../includes/" % (os.path.dirname(os.path.abspath(__file__))))] + \
- include_paths + \
- []
- attempts = []
- for include in include_paths:
- testfile = include + os.sep + filename
- if os.path.isfile(os.path.abspath(testfile)):
- return os.path.abspath(testfile)
- else:
- attempts.append(os.path.abspath(testfile))
- else:
- raise Exception("Could not resolve file %s. Tried: %s" % (filename, attempts))
- def do_compile(inputfile, grammarfile, visitors=[], include_paths = [], mode=""):
- import os.path
- global working_file
- working_file = os.path.abspath(inputfile)
- result = do_parse(inputfile, grammarfile)
- error = result["status"] != Parser.Constants.Success
- if error:
- lines = open(working_file, 'r').readlines()
- begin_line = max(result["line"] - 3, 0)
- end_line = max(result["line"] + 3, len(lines))
- lines = lines[begin_line:end_line]
- lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)]
- lines = "".join(lines)
- msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines)
- raise Exception(msg)
- else:
- for child in result["tree"].tail:
- child.inputfile = inputfile
- included = set()
- while True:
- for i, v in enumerate(result["tree"].tail):
- if v.head == "include":
- # Expand this node
- for j in v.tail:
- if j.head == "STRVALUE":
- f = str(j.tail[0])[1:-1]
- if f in included:
- subtree = []
- else:
- name = str(j.tail[0])[1:-1]
- subtree = do_parse(find_file(name, include_paths), grammarfile)["tree"].tail
- if subtree is None:
- raise Exception("Parsing error for included file %s" % find_file(name, include_paths))
- for t in subtree:
- t.inputfile = name
- included.add(f)
- # Found the string value, so break from the inner for ("searching for element")
- break
- # Merge all nodes in
- before = result["tree"].tail[:i]
- after = result["tree"].tail[i+1:]
- result["tree"].tail = before + subtree + after
- # Found an include node, but to prevent corruption of the tree, we need to start over again, so break from the outer for loop
- break
- else:
- # The outer for finally finished, so there were no includes remaining, thus terminate the infinite while loop
- break
- tree_data = pickle.dumps(result["tree"], pickle.HIGHEST_PROTOCOL)
- new_result = fetch_cached(tree_data, mode)
- if new_result is None:
- result["tree"].fix_tracability(inputfile)
- for visitor in visitors:
- visitor.visit(result["tree"])
- if visitors:
- result = visitors[-1].dump()
- make_cached(tree_data, result, mode)
- else:
- result = new_result
- if visitors:
- return result
- def main(input_file, grammar_file, mode, args=[]):
- from interface.HUTN.hutn_compiler.prettyprint_visitor import PrettyPrintVisitor
- from interface.HUTN.hutn_compiler.prettyprint_visitor import PrintVisitor
- from interface.HUTN.hutn_compiler.semantics_visitor import SemanticsVisitor
- from interface.HUTN.hutn_compiler.bootstrap_visitor import BootstrapVisitor
- from interface.HUTN.hutn_compiler.constructors_visitor import ConstructorsVisitor
- from interface.HUTN.hutn_compiler.model_visitor import ModelVisitor
- from interface.HUTN.hutn_compiler.model_bootstrap_visitor import ModelBootstrapVisitor
- modes = {
- "N" : [],
- "P" : [PrintVisitor],
- "PP" : [PrettyPrintVisitor],
- "BS" : [SemanticsVisitor, BootstrapVisitor],
- "CS" : [SemanticsVisitor, ConstructorsVisitor],
- "M" : [ModelVisitor],
- "MB" : [ModelBootstrapVisitor],
- }
- try:
- visitors = [v(args) for v in modes[mode]]
- result = do_compile(input_file, grammar_file, visitors, mode=mode)
- except CachedException:
- return True
- return result
- if __name__ == "__main__":
- if len(sys.argv) <= 2:
- print("Invocation: ")
- print(" %s input_file grammar_file mode [mode_params]*" % sys.argv[0])
- sys.exit(1)
- else:
- value = main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4:])
- if value is not None:
- print(value)
|