compiler.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. try:
  2. import cPickle as pickle
  3. except ImportError:
  4. import pickle as pickle
  5. import marshal
  6. import os
  7. import sys
  8. import time
  9. from hutn_compiler.grammar_compiler_visitor import GrammarCompilerVisitor
  10. from hutn_compiler.hutnparser import Parser, Tree
  11. from hutn_compiler.meta_grammar import Grammar
  12. from hutn_compiler.cached_exception import CachedException
  13. import hashlib
  14. global parsers
  15. parsers = {}
  16. sys.setrecursionlimit(200000)
  17. def read(filename):
  18. with open(filename, 'r') as f:
  19. return f.read()
  20. def md5digest(data):
  21. hasher = hashlib.md5()
  22. try:
  23. data = data.encode('utf-8')
  24. except UnicodeDecodeError:
  25. pass
  26. except AttributeError:
  27. pass
  28. hasher.update(data)
  29. return hasher.hexdigest()
  30. def fetch_cached(data, use_pickle):
  31. try:
  32. md5 = md5digest(data)
  33. cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__))))
  34. picklefile = cache_folder + "/%s.pickle" % md5
  35. with open(picklefile, "rb") as f:
  36. if use_pickle:
  37. return pickle.load(f)
  38. else:
  39. return marshal.load(f)
  40. except:
  41. return None
  42. def make_cached(original_data, data, use_pickle):
  43. md5 = md5digest(original_data)
  44. cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__))))
  45. picklefile = cache_folder + "/%s.pickle" % md5
  46. with open(picklefile, "wb") as f:
  47. if use_pickle:
  48. pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
  49. else:
  50. marshal.dump(data, f)
  51. def do_parse(inputfile, grammarfile):
  52. if grammarfile not in parsers:
  53. grammar = fetch_cached(read(grammarfile), True)
  54. if grammar is None:
  55. result = parser = Parser(Grammar(), hide_implicit = True).parse(read(grammarfile))
  56. if result['status'] != Parser.Constants.Success:
  57. print('not a valid grammar!')
  58. print(result)
  59. tree = result['tree']
  60. visitor = GrammarCompilerVisitor()
  61. structure = visitor.visit(tree)
  62. grammar = Grammar()
  63. grammar.rules = structure['rules']
  64. grammar.tokens = structure['tokens']
  65. make_cached(read(grammarfile), grammar, True)
  66. parsers[grammarfile] = grammar
  67. else:
  68. grammar = parsers[grammarfile]
  69. result = fetch_cached(read(inputfile), False)
  70. if result is None:
  71. result = Parser(grammar, line_position = True).parse(read(inputfile))
  72. if result['status'] != Parser.Constants.Success:
  73. lines = open(inputfile, 'r').readlines()
  74. begin_line = max(result["line"] - 3, 0)
  75. end_line = min(result["line"] + 3, len(lines))
  76. lines = lines[begin_line:end_line]
  77. lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)]
  78. lines = "".join(lines)
  79. msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines)
  80. raise Exception(msg)
  81. make_cached(read(inputfile), result, False)
  82. return result
  83. def find_file(filename, include_paths):
  84. import os.path
  85. include_paths = ["."] + \
  86. [os.path.abspath(os.path.dirname(working_file))] + \
  87. [os.path.abspath("%s/../includes/" % (os.path.dirname(os.path.abspath(__file__))))] + \
  88. include_paths + \
  89. []
  90. attempts = []
  91. for include in include_paths:
  92. testfile = include + os.sep + filename
  93. if os.path.isfile(os.path.abspath(testfile)):
  94. return os.path.abspath(testfile)
  95. else:
  96. attempts.append(os.path.abspath(testfile))
  97. else:
  98. raise Exception("Could not resolve file %s. Tried: %s" % (filename, attempts))
  99. def do_compile(inputfile, grammarfile, visitors=[], include_paths = [], mode=""):
  100. import os.path
  101. global working_file
  102. working_file = os.path.abspath(inputfile)
  103. result = do_parse(inputfile, grammarfile)
  104. error = result["status"] != Parser.Constants.Success
  105. if error:
  106. lines = open(working_file, 'r').readlines()
  107. begin_line = max(result["line"] - 3, 0)
  108. end_line = max(result["line"] + 3, len(lines))
  109. lines = lines[begin_line:end_line]
  110. lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)]
  111. lines = "".join(lines)
  112. msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines)
  113. raise Exception(msg)
  114. else:
  115. for child in result["tree"]['tail']:
  116. child['inputfile'] = inputfile
  117. included = set()
  118. while True:
  119. for i, v in enumerate(result["tree"]['tail']):
  120. if v['head'] == "include":
  121. # Expand this node
  122. for j in v['tail']:
  123. if j['head'] == "STRVALUE":
  124. f = str(j['tail'][0])[1:-1]
  125. if f in included:
  126. subtree = []
  127. else:
  128. name = str(j['tail'][0])[1:-1]
  129. subtree = do_parse(find_file(name, include_paths), grammarfile)["tree"]['tail']
  130. if subtree is None:
  131. raise Exception("Parsing error for included file %s" % find_file(name, include_paths))
  132. for t in subtree:
  133. t['inputfile'] = name
  134. included.add(f)
  135. # Found the string value, so break from the inner for ("searching for element")
  136. break
  137. # Merge all nodes in
  138. before = result["tree"]['tail'][:i]
  139. after = result["tree"]['tail'][i+1:]
  140. result["tree"]['tail'] = before + subtree + after
  141. # Found an include node, but to prevent corruption of the tree, we need to start over again, so break from the outer for loop
  142. break
  143. else:
  144. # The outer for finally finished, so there were no includes remaining, thus terminate the infinite while loop
  145. break
  146. Tree.fix_tracability(result['tree'], inputfile)
  147. for visitor in visitors:
  148. visitor.visit(result["tree"])
  149. if visitors:
  150. result = visitors[-1].dump()
  151. return result
  152. def main(input_file, grammar_file, mode, args=[]):
  153. from hutn_compiler.prettyprint_visitor import PrettyPrintVisitor
  154. from hutn_compiler.prettyprint_visitor import PrintVisitor
  155. from hutn_compiler.semantics_visitor import SemanticsVisitor
  156. from hutn_compiler.bootstrap_visitor import BootstrapVisitor
  157. from hutn_compiler.constructors_visitor import ConstructorsVisitor
  158. from hutn_compiler.model_visitor import ModelVisitor
  159. from hutn_compiler.model_bootstrap_visitor import ModelBootstrapVisitor
  160. modes = {
  161. "N" : [],
  162. "P" : [PrintVisitor],
  163. "PP" : [PrettyPrintVisitor],
  164. "BS" : [SemanticsVisitor, BootstrapVisitor],
  165. "CS" : [SemanticsVisitor, ConstructorsVisitor],
  166. "M" : [ModelVisitor],
  167. "MB" : [ModelBootstrapVisitor],
  168. }
  169. try:
  170. visitors = [v(args) for v in modes[mode]]
  171. result = do_compile(input_file, grammar_file, visitors, mode=mode)
  172. except CachedException:
  173. return True
  174. return result
  175. if __name__ == "__main__":
  176. if len(sys.argv) <= 2:
  177. print("Invocation: ")
  178. print(" %s input_file grammar_file mode [mode_params]*" % sys.argv[0])
  179. sys.exit(1)
  180. else:
  181. value = main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4:])
  182. if value is not None:
  183. print(value)