try: import cPickle as pickle except ImportError: import pickle as pickle import marshal import os import sys import time from hutn_compiler.grammar_compiler_visitor import GrammarCompilerVisitor from hutn_compiler.hutnparser import Parser, Tree from hutn_compiler.meta_grammar import Grammar from hutn_compiler.cached_exception import CachedException import hashlib global parsers parsers = {} sys.setrecursionlimit(200000) def read(filename): with open(filename, 'r') as f: return f.read() def md5digest(data): hasher = hashlib.md5() try: data = data.encode('utf-8') except UnicodeDecodeError: pass except AttributeError: pass hasher.update(data) return hasher.hexdigest() def fetch_cached(data, use_pickle): try: md5 = md5digest(data) cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__)))) picklefile = cache_folder + "/%s.pickle" % md5 with open(picklefile, "rb") as f: if use_pickle: return pickle.load(f) else: return marshal.load(f) except: return None def make_cached(original_data, data, use_pickle): md5 = md5digest(original_data) cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__)))) picklefile = cache_folder + "/%s.pickle" % md5 with open(picklefile, "wb") as f: if use_pickle: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) else: marshal.dump(data, f) def do_parse(inputfile, grammarfile): if grammarfile not in parsers: grammar = fetch_cached(read(grammarfile), True) if grammar is None: result = parser = Parser(Grammar(), hide_implicit = True).parse(read(grammarfile)) if result['status'] != Parser.Constants.Success: print('not a valid grammar!') print(result) tree = result['tree'] visitor = GrammarCompilerVisitor() structure = visitor.visit(tree) grammar = Grammar() grammar.rules = structure['rules'] grammar.tokens = structure['tokens'] make_cached(read(grammarfile), grammar, True) parsers[grammarfile] = grammar else: grammar = parsers[grammarfile] result = fetch_cached(read(inputfile), False) if result is None: result = Parser(grammar, line_position = True).parse(read(inputfile)) if result['status'] != Parser.Constants.Success: lines = open(inputfile, 'r').readlines() begin_line = max(result["line"] - 3, 0) end_line = min(result["line"] + 3, len(lines)) lines = lines[begin_line:end_line] lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)] lines = "".join(lines) msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines) raise Exception(msg) make_cached(read(inputfile), result, False) return result def find_file(filename, include_paths): import os.path include_paths = ["."] + \ [os.path.abspath(os.path.dirname(working_file))] + \ [os.path.abspath("%s/../includes/" % (os.path.dirname(os.path.abspath(__file__))))] + \ include_paths + \ [] attempts = [] for include in include_paths: testfile = include + os.sep + filename if os.path.isfile(os.path.abspath(testfile)): return os.path.abspath(testfile) else: attempts.append(os.path.abspath(testfile)) else: raise Exception("Could not resolve file %s. Tried: %s" % (filename, attempts)) def do_compile(inputfile, grammarfile, visitors=[], include_paths = [], mode=""): import os.path global working_file working_file = os.path.abspath(inputfile) result = do_parse(inputfile, grammarfile) error = result["status"] != Parser.Constants.Success if error: lines = open(working_file, 'r').readlines() begin_line = max(result["line"] - 3, 0) end_line = max(result["line"] + 3, len(lines)) lines = lines[begin_line:end_line] lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)] lines = "".join(lines) msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines) raise Exception(msg) else: for child in result["tree"]['tail']: child['inputfile'] = inputfile included = set() while True: for i, v in enumerate(result["tree"]['tail']): if v['head'] == "include": # Expand this node for j in v['tail']: if j['head'] == "STRVALUE": f = str(j['tail'][0])[1:-1] if f in included: subtree = [] else: name = str(j['tail'][0])[1:-1] subtree = do_parse(find_file(name, include_paths), grammarfile)["tree"]['tail'] if subtree is None: raise Exception("Parsing error for included file %s" % find_file(name, include_paths)) for t in subtree: t['inputfile'] = name included.add(f) # Found the string value, so break from the inner for ("searching for element") break # Merge all nodes in before = result["tree"]['tail'][:i] after = result["tree"]['tail'][i+1:] result["tree"]['tail'] = before + subtree + after # Found an include node, but to prevent corruption of the tree, we need to start over again, so break from the outer for loop break else: # The outer for finally finished, so there were no includes remaining, thus terminate the infinite while loop break Tree.fix_tracability(result['tree'], inputfile) for visitor in visitors: visitor.visit(result["tree"]) if visitors: result = visitors[-1].dump() return result def main(input_file, grammar_file, mode, args=[]): from hutn_compiler.prettyprint_visitor import PrettyPrintVisitor from hutn_compiler.prettyprint_visitor import PrintVisitor from hutn_compiler.semantics_visitor import SemanticsVisitor from hutn_compiler.bootstrap_visitor import BootstrapVisitor from hutn_compiler.constructors_visitor import ConstructorsVisitor from hutn_compiler.model_visitor import ModelVisitor from hutn_compiler.model_bootstrap_visitor import ModelBootstrapVisitor modes = { "N" : [], "P" : [PrintVisitor], "PP" : [PrettyPrintVisitor], "BS" : [SemanticsVisitor, BootstrapVisitor], "CS" : [SemanticsVisitor, ConstructorsVisitor], "M" : [ModelVisitor], "MB" : [ModelBootstrapVisitor], } try: visitors = [v(args) for v in modes[mode]] result = do_compile(input_file, grammar_file, visitors, mode=mode) except CachedException: return True return result if __name__ == "__main__": if len(sys.argv) <= 2: print("Invocation: ") print(" %s input_file grammar_file mode [mode_params]*" % sys.argv[0]) sys.exit(1) else: value = main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4:]) if value is not None: print(value)