compiler.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. try:
  2. import cPickle as pickle
  3. except ImportError:
  4. import pickle as pickle
  5. import os
  6. import sys
  7. import time
  8. from interface.HUTN.hutn_compiler.grammar_compiler_visitor import GrammarCompilerVisitor
  9. from interface.HUTN.hutn_compiler.hutnparser import Parser, Tree
  10. from interface.HUTN.hutn_compiler.meta_grammar import Grammar
  11. from interface.HUTN.hutn_compiler.cached_exception import CachedException
  12. import hashlib
  13. global parsers
  14. parsers = {}
  15. sys.setrecursionlimit(2000)
  16. def read(filename):
  17. with open(filename, 'r') as f:
  18. return f.read()
  19. def md5digest(data):
  20. hasher = hashlib.md5()
  21. try:
  22. data = data.encode('utf-8')
  23. except UnicodeDecodeError:
  24. pass
  25. except AttributeError:
  26. pass
  27. hasher.update(data)
  28. return hasher.hexdigest()
  29. def fetch_cached(data, mode=None):
  30. try:
  31. md5 = md5digest(data)
  32. cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__))))
  33. if mode is None:
  34. picklefile = cache_folder + "/%s.pickle" % md5
  35. else:
  36. picklefile = cache_folder + "/%s_%s.pickle" % (mode, md5)
  37. with open(picklefile, "rb") as f:
  38. return pickle.load(f)
  39. except:
  40. return None
  41. def make_cached(original_data, data, mode=None):
  42. md5 = md5digest(original_data)
  43. cache_folder = os.path.abspath("%s/../caches/" % (os.path.dirname(os.path.abspath(__file__))))
  44. if mode is None:
  45. picklefile = cache_folder + "/%s.pickle" % md5
  46. else:
  47. picklefile = cache_folder + "/%s_%s.pickle" % (mode, md5)
  48. with open(picklefile, "wb") as f:
  49. pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
  50. def do_parse(inputfile, grammarfile):
  51. if grammarfile not in parsers:
  52. grammar = fetch_cached(read(grammarfile))
  53. if grammar is None:
  54. result = parser = Parser(Grammar(), hide_implicit = True).parse(read(grammarfile))
  55. if result['status'] != Parser.Constants.Success:
  56. print('not a valid grammar!')
  57. print(result)
  58. tree = result['tree']
  59. visitor = GrammarCompilerVisitor()
  60. structure = visitor.visit(tree)
  61. grammar = Grammar()
  62. grammar.rules = structure['rules']
  63. grammar.tokens = structure['tokens']
  64. make_cached(read(grammarfile), grammar)
  65. parsers[grammarfile] = grammar
  66. else:
  67. grammar = parsers[grammarfile]
  68. result = fetch_cached(read(inputfile))
  69. if result is None:
  70. result = Parser(grammar, line_position = True).parse(read(inputfile))
  71. if result['status'] != Parser.Constants.Success:
  72. lines = open(inputfile, 'r').readlines()
  73. begin_line = max(result["line"] - 3, 0)
  74. end_line = min(result["line"] + 3, len(lines))
  75. lines = lines[begin_line:end_line]
  76. lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)]
  77. lines = "".join(lines)
  78. msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines)
  79. raise Exception(msg)
  80. make_cached(read(inputfile), result)
  81. return result
  82. def find_file(filename, include_paths):
  83. import os.path
  84. include_paths = ["."] + \
  85. [os.path.abspath(os.path.dirname(working_file))] + \
  86. [os.path.abspath("%s/../includes/" % (os.path.dirname(os.path.abspath(__file__))))] + \
  87. include_paths + \
  88. []
  89. attempts = []
  90. for include in include_paths:
  91. testfile = include + os.sep + filename
  92. if os.path.isfile(os.path.abspath(testfile)):
  93. return os.path.abspath(testfile)
  94. else:
  95. attempts.append(os.path.abspath(testfile))
  96. else:
  97. raise Exception("Could not resolve file %s. Tried: %s" % (filename, attempts))
  98. def do_compile(inputfile, grammarfile, visitors=[], include_paths = [], mode=""):
  99. import os.path
  100. global working_file
  101. working_file = os.path.abspath(inputfile)
  102. result = do_parse(inputfile, grammarfile)
  103. error = result["status"] != Parser.Constants.Success
  104. if error:
  105. lines = open(working_file, 'r').readlines()
  106. begin_line = max(result["line"] - 3, 0)
  107. end_line = max(result["line"] + 3, len(lines))
  108. lines = lines[begin_line:end_line]
  109. lines = ["%s: %s" % (begin_line + i + 1, line) for i, line in enumerate(lines)]
  110. lines = "".join(lines)
  111. msg = "%s:%s:%s: %s\nContext:\n%s" % (inputfile, result["line"], result["column"], result["text"], lines)
  112. raise Exception(msg)
  113. else:
  114. for child in result["tree"].tail:
  115. child.inputfile = inputfile
  116. included = set()
  117. while True:
  118. for i, v in enumerate(result["tree"].tail):
  119. if v.head == "include":
  120. # Expand this node
  121. for j in v.tail:
  122. if j.head == "STRVALUE":
  123. f = str(j.tail[0])[1:-1]
  124. if f in included:
  125. subtree = []
  126. else:
  127. name = str(j.tail[0])[1:-1]
  128. subtree = do_parse(find_file(name, include_paths), grammarfile)["tree"].tail
  129. if subtree is None:
  130. raise Exception("Parsing error for included file %s" % find_file(name, include_paths))
  131. for t in subtree:
  132. t.inputfile = name
  133. included.add(f)
  134. # Found the string value, so break from the inner for ("searching for element")
  135. break
  136. # Merge all nodes in
  137. before = result["tree"].tail[:i]
  138. after = result["tree"].tail[i+1:]
  139. result["tree"].tail = before + subtree + after
  140. # Found an include node, but to prevent corruption of the tree, we need to start over again, so break from the outer for loop
  141. break
  142. else:
  143. # The outer for finally finished, so there were no includes remaining, thus terminate the infinite while loop
  144. break
  145. tree_data = pickle.dumps(result["tree"], pickle.HIGHEST_PROTOCOL)
  146. new_result = fetch_cached(tree_data, mode)
  147. if new_result is None:
  148. result["tree"].fix_tracability(inputfile)
  149. for visitor in visitors:
  150. visitor.visit(result["tree"])
  151. if visitors:
  152. result = visitors[-1].dump()
  153. make_cached(tree_data, result, mode)
  154. else:
  155. result = new_result
  156. if visitors:
  157. return result
  158. def main(input_file, grammar_file, mode, args=[]):
  159. from interface.HUTN.hutn_compiler.prettyprint_visitor import PrettyPrintVisitor
  160. from interface.HUTN.hutn_compiler.prettyprint_visitor import PrintVisitor
  161. from interface.HUTN.hutn_compiler.semantics_visitor import SemanticsVisitor
  162. from interface.HUTN.hutn_compiler.bootstrap_visitor import BootstrapVisitor
  163. from interface.HUTN.hutn_compiler.constructors_visitor import ConstructorsVisitor
  164. from interface.HUTN.hutn_compiler.model_visitor import ModelVisitor
  165. from interface.HUTN.hutn_compiler.model_bootstrap_visitor import ModelBootstrapVisitor
  166. modes = {
  167. "N" : [],
  168. "P" : [PrintVisitor],
  169. "PP" : [PrettyPrintVisitor],
  170. "BS" : [SemanticsVisitor, BootstrapVisitor],
  171. "CS" : [SemanticsVisitor, ConstructorsVisitor],
  172. "M" : [ModelVisitor],
  173. "MB" : [ModelBootstrapVisitor],
  174. }
  175. try:
  176. visitors = [v(args) for v in modes[mode]]
  177. result = do_compile(input_file, grammar_file, visitors, mode=mode)
  178. except CachedException:
  179. return True
  180. return result
  181. if __name__ == "__main__":
  182. if len(sys.argv) <= 2:
  183. print("Invocation: ")
  184. print(" %s input_file grammar_file mode [mode_params]*" % sys.argv[0])
  185. sys.exit(1)
  186. else:
  187. value = main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4:])
  188. if value is not None:
  189. print(value)