try: import cPickle as pickle except ImportError: import pickle as pickle import os import sys import time from interface.HUTN.hutn_compiler.grammar_compiler_visitor import GrammarCompilerVisitor from interface.HUTN.hutn_compiler.hutnparser import Parser, Tree from interface.HUTN.hutn_compiler.meta_grammar import Grammar from interface.HUTN.hutn_compiler.cached_exception import CachedException import hashlib global parsers parsers = {} sys.setrecursionlimit(2000) def read(filename): with open(filename, 'r') as f: return f.read() def md5digest(data): hasher = hashlib.md5() try: data = data.encode('utf-8') except AttributeError: pass hasher.update(data) return hasher.hexdigest() def fetch_cached(data, mode=None): try: md5 = md5digest(data) cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__)))) if mode is None: picklefile = cache_folder + "/%s.pickle" % md5 else: picklefile = cache_folder + "/%s_%s.pickle" % (mode, md5) with open(picklefile, "rb") as f: return pickle.load(f) except: return None def make_cached(original_data, data, mode=None): md5 = md5digest(original_data) cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__)))) if mode is None: picklefile = cache_folder + "/%s.pickle" % md5 else: picklefile = cache_folder + "/%s_%s.pickle" % (mode, md5) with open(picklefile, "wb") as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) def do_parse(inputfile, grammarfile): if grammarfile not in parsers: grammar = fetch_cached(read(grammarfile)) if grammar is None: result = parser = Parser(Grammar(), hide_implicit = True).parse(read(grammarfile)) if result['status'] != Parser.Constants.Success: print('not a valid grammar!') print(result) tree = result['tree'] visitor = GrammarCompilerVisitor() structure = visitor.visit(tree) grammar = Grammar() grammar.rules = structure['rules'] grammar.tokens = structure['tokens'] make_cached(read(grammarfile), grammar) parsers[grammarfile] = grammar else: grammar = parsers[grammarfile] result = fetch_cached(read(inputfile)) if result is None: result = Parser(grammar, line_position = True).parse(read(inputfile)) if result['status'] != Parser.Constants.Success: lines = open(inputfile, 'r').readlines() begin_line = max(result["line"] - 3, 0) end_line = min(result["line"] + 3, len(lines)) lines = lines[begin_line:end_line] lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)] lines = "".join(lines) msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines) raise Exception(msg) make_cached(read(inputfile), result) return result def find_file(filename, include_paths): import os.path include_paths = ["."] + \ [os.path.abspath(os.path.dirname(working_file))] + \ [os.path.abspath("%s/../includes/" % (os.path.dirname(os.path.abspath(__file__))))] + \ include_paths + \ [] attempts = [] for include in include_paths: testfile = include + os.sep + filename if os.path.isfile(os.path.abspath(testfile)): return os.path.abspath(testfile) else: attempts.append(os.path.abspath(testfile)) else: raise Exception("Could not resolve file %s. Tried: %s" % (filename, attempts)) def do_compile(inputfile, grammarfile, visitors=[], include_paths = [], mode=""): import os.path global working_file working_file = os.path.abspath(inputfile) result = do_parse(inputfile, grammarfile) error = result["status"] != Parser.Constants.Success if error: lines = open(working_file, 'r').readlines() begin_line = max(result["line"] - 3, 0) end_line = max(result["line"] + 3, len(lines)) lines = lines[begin_line:end_line] lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)] lines = "".join(lines) msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines) raise Exception(msg) else: for child in result["tree"].tail: child.inputfile = inputfile included = set() while True: for i, v in enumerate(result["tree"].tail): if v.head == "include": # Expand this node for j in v.tail: if j.head == "STRVALUE": f = str(j.tail[0])[1:-1] if f in included: subtree = [] else: name = str(j.tail[0])[1:-1] subtree = do_parse(find_file(name, include_paths), grammarfile)["tree"].tail if subtree is None: raise Exception("Parsing error for included file %s" % find_file(name, include_paths)) for t in subtree: t.inputfile = name included.add(f) # Found the string value, so break from the inner for ("searching for element") break # Merge all nodes in before = result["tree"].tail[:i] after = result["tree"].tail[i+1:] result["tree"].tail = before + subtree + after # Found an include node, but to prevent corruption of the tree, we need to start over again, so break from the outer for loop break else: # The outer for finally finished, so there were no includes remaining, thus terminate the infinite while loop break tree_data = pickle.dumps(result["tree"], pickle.HIGHEST_PROTOCOL) new_result = fetch_cached(tree_data, mode) if new_result is None: result["tree"].fix_tracability(inputfile) for visitor in visitors: visitor.visit(result["tree"]) if visitors: result = visitors[-1].dump() make_cached(tree_data, result, mode) else: result = new_result if visitors: return result def main(input_file, grammar_file, mode, args=[]): from interface.HUTN.hutn_compiler.prettyprint_visitor import PrettyPrintVisitor from interface.HUTN.hutn_compiler.prettyprint_visitor import PrintVisitor from interface.HUTN.hutn_compiler.semantics_visitor import SemanticsVisitor from interface.HUTN.hutn_compiler.bootstrap_visitor import BootstrapVisitor from interface.HUTN.hutn_compiler.constructors_visitor import ConstructorsVisitor from interface.HUTN.hutn_compiler.model_visitor import ModelVisitor from interface.HUTN.hutn_compiler.model_bootstrap_visitor import ModelBootstrapVisitor modes = { "N" : [], "P" : [PrintVisitor], "PP" : [PrettyPrintVisitor], "BS" : [SemanticsVisitor, BootstrapVisitor], "CS" : [SemanticsVisitor, ConstructorsVisitor], "M" : [ModelVisitor], "MB" : [ModelBootstrapVisitor], } try: visitors = [v(args) for v in modes[mode]] result = do_compile(input_file, grammar_file, visitors, mode=mode) except CachedException: return True return result if __name__ == "__main__": if len(sys.argv) <= 2: print("Invocation: ") print(" %s input_file grammar_file mode [mode_params]*" % sys.argv[0]) sys.exit(1) else: value = main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4:]) if value is not None: print(value)