perf2tex.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. """Converts performance data files (as produced by utils.py) to LaTeX charts."""
  2. import argparse
  3. import colorsys
  4. import utils
  5. # Generated LaTeX is based on the accepted answer to
  6. # http://tex.stackexchange.com/questions/101320/grouped-bar-chart
  7. # pylint: disable=I0011,W0141
  8. COLOR_SCHEME_MAX_COLOR = (8, 30, 89)
  9. COLOR_SCHEME_MIN_COLOR = (216, 33, 0)
  10. DEFAULT_BAR_WIDTH = 14
  11. LATEX_HEADER = r"""\documentclass[12pt,a4paper,onecolumn,openright]{report}
  12. \usepackage[landscape]{geometry}
  13. \usepackage{xcolor}
  14. \usepackage{pgfplots}
  15. \usepackage{tikz}
  16. \usepgfplotslibrary{units}
  17. % Define bar chart colors
  18. %"""
  19. LATEX_DOCUMENT_HEADER = r"""\begin{document}"""
  20. TIKZ_PICTURE_HEADER = r"""\begin{tikzpicture}"""
  21. TIKZ_PICTURE_FOOTER = r"""\end{tikzpicture}"""
  22. LATEX_DOCUMENT_FOOTER = r"""\end{document}"""
  23. def encode_latex_string(value):
  24. """Encodes the given string as a LaTeX string."""
  25. # I guess this is good enough for now. This may need to be
  26. # revisited if we encounter more complicated names.
  27. return '{%s}' % value.replace('_', '\\_')
  28. def assemble_latex_chart(optimization_levels,
  29. color_defs,
  30. test_names,
  31. data,
  32. embed=False,
  33. bar_width=DEFAULT_BAR_WIDTH,
  34. confidence_intervals=None):
  35. """Assembles a LaTeX chart from the given components."""
  36. lines = []
  37. if not embed:
  38. lines.append(LATEX_HEADER)
  39. for color_name, (red, green, blue) in color_defs:
  40. lines.append(r'\definecolor{%s}{HTML}{%02X%02X%02X}' %
  41. (color_name, red, green, blue))
  42. lines.append(LATEX_DOCUMENT_HEADER)
  43. lines.append(TIKZ_PICTURE_HEADER)
  44. lines.append(r""" \begin{axis}[
  45. width = 0.85*\textwidth,
  46. height = 8cm,
  47. major x tick style = transparent,
  48. ybar=2*\pgflinewidth,
  49. bar width=%dpt,
  50. ymajorgrids = true,
  51. ylabel = {Run time},
  52. symbolic x coords={%s},
  53. xtick = data,
  54. scaled y ticks = false,
  55. enlarge x limits=0.25,
  56. ymin=0,
  57. y unit=s,
  58. legend cell align=left,
  59. legend style={
  60. at={(1,1.05)},
  61. anchor=south east,
  62. column sep=1ex
  63. },
  64. nodes near coords,
  65. every node near coord/.append style={rotate=90, anchor=west}
  66. ]""" % (bar_width, ','.join(map(encode_latex_string, test_names))))
  67. for color_name, points in data:
  68. if confidence_intervals is None:
  69. lines.append(r"""
  70. \addplot[style={%s,fill=%s,mark=none}]
  71. coordinates {%s};""" % (color_name, color_name, ' '.join(
  72. [('(%s,%s)' % (encode_latex_string(name), measurement))
  73. for name, measurement in points])))
  74. else:
  75. lines.append(r"""
  76. \addplot[style={%s,fill=%s,mark=none},error bars/.cd, y dir=both, y explicit,error bar style=darkgray]
  77. coordinates {%s};""" % (color_name, color_name, ' '.join(
  78. [('(%s,%s) += (%s,%s) -= (%s,%s)' %
  79. (encode_latex_string(name), measurement,
  80. encode_latex_string(name),
  81. confidence_intervals[color_name][name][0],
  82. encode_latex_string(name),
  83. confidence_intervals[color_name][name][1]))
  84. for name, measurement in points])))
  85. lines.append(r"""
  86. \legend{%s}""" %
  87. ','.join(map(encode_latex_string, optimization_levels)))
  88. lines.append(r"""
  89. \end{axis}""")
  90. lines.append(TIKZ_PICTURE_FOOTER)
  91. if not embed:
  92. lines.append(LATEX_DOCUMENT_FOOTER)
  93. return '\n'.join(lines)
  94. def flip_tuple_keys(tuple_list):
  95. index_map = {}
  96. results = []
  97. for top, top_data in tuple_list:
  98. for child, child_data in top_data:
  99. if child not in index_map:
  100. index_map[child] = len(results)
  101. results.append((child, []))
  102. _, child_list = results[index_map[child]]
  103. child_list.append((top, child_data))
  104. return results
  105. def assemble_stacked_latex_chart(optimization_levels,
  106. quantity_colors,
  107. test_names,
  108. data,
  109. embed=False,
  110. bar_width=DEFAULT_BAR_WIDTH):
  111. """Assembles a stacked LaTeX chart from the given components."""
  112. lines = []
  113. quantity_color_names = {
  114. name: 'chartColor%d' % i
  115. for i, (name, _) in enumerate(quantity_colors)
  116. }
  117. quantity_names = [name for name, _ in quantity_colors]
  118. if not embed:
  119. lines.append(LATEX_HEADER)
  120. for quantity_name, (red, green, blue) in quantity_colors:
  121. lines.append(
  122. r'\definecolor{%s}{HTML}{%02X%02X%02X}' %
  123. (quantity_color_names[quantity_name], red, green, blue))
  124. lines.append(LATEX_DOCUMENT_HEADER)
  125. lines.append(TIKZ_PICTURE_HEADER)
  126. lines.append(r""" \begin{axis}[
  127. ybar stacked,
  128. legend style={
  129. legend columns=%d,
  130. at={(xticklabel cs:0.5)},
  131. anchor=north,
  132. draw=none
  133. },
  134. width=0.85*\textwidth,
  135. height=8cm,
  136. xtick=data,
  137. bar width=%dpt,
  138. ymin=0,
  139. major x tick style=transparent,
  140. ymajorgrids=true,
  141. xticklabels={%s},
  142. x tick label style={rotate=90,anchor=east,font=\scriptsize},
  143. tick label style={font=\footnotesize},
  144. legend style={font=\footnotesize,yshift=-3ex},
  145. label style={font=\footnotesize},
  146. ylabel={Time},
  147. y unit=s,
  148. area legend,
  149. nodes near coords,
  150. every node near coord/.append style={rotate=90, anchor=west}
  151. ]""" % (len(optimization_levels), bar_width, ','.join(
  152. [encode_latex_string(name) for name in optimization_levels] *
  153. len(test_names))))
  154. for quantity, points in data:
  155. contents = []
  156. i = 0
  157. for _, test_data in flip_tuple_keys(points):
  158. for _, measurement in test_data:
  159. contents.append('(%d,%s)' % (i, measurement))
  160. i += 1
  161. i += 1
  162. lines.append(r"""
  163. \addplot[style={%s,fill=%s,mark=none}]
  164. coordinates {%s};""" %
  165. (quantity_color_names[quantity],
  166. quantity_color_names[quantity], ' '.join(contents)))
  167. lines.append(r"""
  168. \legend{%s}""" % ','.join(map(encode_latex_string, quantity_names)))
  169. lines.append(r"""
  170. \end{axis}""")
  171. lines.append(TIKZ_PICTURE_FOOTER)
  172. if not embed:
  173. lines.append(LATEX_DOCUMENT_FOOTER)
  174. return '\n'.join(lines)
  175. def create_latex_chart(perf_data,
  176. sorted_opt_levels=None,
  177. embed=False,
  178. bar_width=DEFAULT_BAR_WIDTH,
  179. confidence_intervals=None):
  180. """Creates a LaTeX chart for the given performance data."""
  181. if sorted_opt_levels is None:
  182. sorted_opt_levels = sort_by_runtime(perf_data)
  183. color_scheme = generate_color_scheme(sorted_opt_levels)
  184. opt_levels = []
  185. color_defs = []
  186. color_conf_intervals = {}
  187. test_names = []
  188. data = []
  189. for i, optimization_level in enumerate(sorted_opt_levels):
  190. measurements = perf_data[optimization_level]
  191. color = color_scheme[optimization_level]
  192. color_name = 'chartColor%d' % i
  193. opt_levels.append(optimization_level)
  194. color_defs.append((color_name, color))
  195. data.append((color_name, measurements.items()))
  196. color_conf_intervals[color_name] = {}
  197. for name, _ in measurements.items():
  198. if confidence_intervals is not None:
  199. color_conf_intervals[color_name][name] = confidence_intervals[
  200. optimization_level][name]
  201. if name not in test_names:
  202. test_names.append(name)
  203. return assemble_latex_chart(
  204. opt_levels, color_defs, test_names, data, embed, bar_width, None
  205. if confidence_intervals is None else color_conf_intervals)
  206. def create_stacked_latex_chart(perf_data,
  207. sorted_optimization_levels=None,
  208. embed=False,
  209. bar_width=DEFAULT_BAR_WIDTH):
  210. """Creates a stacked LaTeX chart for the given performance data."""
  211. color_scheme = generate_color_scheme([q for q, _ in perf_data])
  212. opt_levels = []
  213. quantity_colors = {}
  214. test_names = []
  215. for quantity, measurements in perf_data:
  216. quantity_colors[quantity] = color_scheme[quantity]
  217. for optimization_level, data_points in measurements:
  218. if optimization_level not in opt_levels:
  219. opt_levels.append(optimization_level)
  220. for name, _ in data_points:
  221. if name not in test_names:
  222. test_names.append(name)
  223. sorted_data = list(
  224. sorted(perf_data, key=lambda (_, data): sum(get_mean_runtimes(perf_list_to_dict(data)).values())))
  225. if sorted_optimization_levels is None and len(sorted_data) > 0:
  226. sorted_optimization_levels = sort_by_runtime(sorted_data[0])
  227. sorted_data = [(quantity, list(
  228. sorted(
  229. data, key=lambda (opt, _): sorted_optimization_levels.index(opt))))
  230. for quantity, data in sorted_data]
  231. return assemble_stacked_latex_chart(
  232. sorted_optimization_levels,
  233. [(q, quantity_colors[q]) for q, _ in sorted_data], test_names,
  234. sorted_data, embed, bar_width)
  235. def get_mean_runtimes(perf_data):
  236. """Computes the mean run-time of every optimization level in the given
  237. performance data."""
  238. return {
  239. opt_level: utils.mean(perf_data[opt_level].values())
  240. for opt_level in perf_data.keys()
  241. }
  242. def get_baseline_optimization_level(perf_data):
  243. """Gets a baseline optimization level from the given performance data.
  244. This baseline optimization level is guaranteed to exist for every test case.
  245. If no baseline optimization level can be found, then None is returned."""
  246. # First find the name of all test cases.
  247. all_test_names = set()
  248. for optimization_level, measurements in perf_data.items():
  249. all_test_names.update(measurements.keys())
  250. # Filter optimization levels which are used for every test case.
  251. candidate_opt_levels = []
  252. for optimization_level, measurements in perf_data.items():
  253. if len(all_test_names) == len(measurements):
  254. candidate_opt_levels.append(optimization_level)
  255. if len(candidate_opt_levels) == 0:
  256. # Looks like there is no baseline optimization level.
  257. return None
  258. # Pick the optimization level with the highest total run-time as the baseline.
  259. return max(candidate_opt_levels,
  260. key=lambda opt_level: sum(perf_data[opt_level].values()))
  261. def get_relative_measurements(perf_data,
  262. baseline_optimization_level,
  263. baseline_perf_data=None):
  264. """Computes a map of measurements that are relative to the given optimization level."""
  265. if baseline_perf_data is None:
  266. baseline_perf_data = perf_data
  267. results = {}
  268. for optimization_level, measurements in perf_data.items():
  269. results[optimization_level] = {}
  270. for test_name, data_point in measurements.items():
  271. results[optimization_level][test_name] = (
  272. data_point /
  273. baseline_perf_data[baseline_optimization_level][test_name])
  274. return results
  275. def perf_list_to_dict(perf_list):
  276. """Converts performance data from a list representation to a dictionary representation."""
  277. return {opt_level: dict(tests) for opt_level, tests in perf_list}
  278. def perf_dict_to_list(perf_dict):
  279. """Converts performance data from a dictionary representation to a list representation."""
  280. return [(opt_level, tests.items())
  281. for opt_level, tests in perf_dict.items()]
  282. def interpolate(value_range, index, length):
  283. """Uses an index and a length to interpolate in the given range."""
  284. min_val, max_val = value_range
  285. if length == 1:
  286. return max_val
  287. else:
  288. return min_val + float(index) * (max_val - min_val) / float(length - 1)
  289. def sort_by_runtime(perf_data):
  290. """Sorts the optimization levels by mean relative runtimes."""
  291. baseline_opt_level = get_baseline_optimization_level(perf_data)
  292. relative_perf = get_relative_measurements(perf_data, baseline_opt_level)
  293. # Sort the optimization levels by their mean runtimes.
  294. mean_runtimes = get_mean_runtimes(relative_perf)
  295. return list(
  296. sorted(
  297. mean_runtimes.keys(),
  298. key=lambda opt_level: mean_runtimes[opt_level],
  299. reverse=True))
  300. def generate_color_scheme(sorted_opt_levels):
  301. """Assigns a color to every optimization level in the given performance data."""
  302. # Assign colors to the optimization levels.
  303. color_scheme = {}
  304. min_hue, min_sat, min_val = colorsys.rgb_to_hsv(
  305. *[c / float(255) for c in COLOR_SCHEME_MIN_COLOR])
  306. max_hue, max_sat, max_val = colorsys.rgb_to_hsv(
  307. *[c / float(255) for c in COLOR_SCHEME_MAX_COLOR])
  308. for i, opt_level in enumerate(sorted_opt_levels):
  309. hue = interpolate((min_hue, max_hue), i, len(sorted_opt_levels))
  310. sat = interpolate((min_sat, max_sat), i, len(sorted_opt_levels))
  311. val = interpolate((min_val, max_val), i, len(sorted_opt_levels))
  312. color = [
  313. component * 255 for component in colorsys.hsv_to_rgb(hue, sat, val)
  314. ]
  315. color_scheme[opt_level] = color
  316. return color_scheme
  317. def filter_perf_data(perf_data,
  318. filter_quantity=None,
  319. filter_opt_level=None,
  320. filter_test=None):
  321. """Applies a number of filters to performance data."""
  322. if filter_quantity is None:
  323. filter_quantity = lambda _: True
  324. if filter_opt_level is None:
  325. filter_opt_level = lambda _: True
  326. if filter_test is None:
  327. filter_test = lambda _: True
  328. results = {}
  329. for quantity, quantity_data in perf_data.items():
  330. if not filter_quantity(quantity):
  331. continue
  332. new_quantity_data = []
  333. for opt_level, opt_data in quantity_data:
  334. if not filter_opt_level(opt_level):
  335. continue
  336. new_opt_data = []
  337. for test, data in opt_data:
  338. if filter_test(test):
  339. new_opt_data.append((test, data))
  340. if len(new_opt_data) > 0:
  341. new_quantity_data.append((opt_level, new_opt_data))
  342. if len(new_quantity_data) > 0:
  343. results[quantity] = new_quantity_data
  344. return results
  345. def subtract_perf_data(lhs_perf_dict, *rhs_perf_dicts):
  346. """Performs measurement-wise subtraction on performance dictionaries."""
  347. return {
  348. opt_level: {
  349. test_name: measurement - sum(p_dict[opt_level][test_name]
  350. for p_dict in rhs_perf_dicts)
  351. for test_name, measurement in opt_data.items()
  352. }
  353. for opt_level, opt_data in lhs_perf_dict.items()
  354. }
  355. def main():
  356. arg_parser = argparse.ArgumentParser()
  357. arg_parser.add_argument(
  358. 'input', help='The performance data file.', nargs='?', default=None)
  359. arg_parser.add_argument(
  360. '-q',
  361. '--quantity',
  362. type=str,
  363. help="The quantity to build a bar chart for. Defaults to '%s'." %
  364. utils.TOTAL_TIME_QUANTITY,
  365. default=utils.TOTAL_TIME_QUANTITY)
  366. arg_parser.add_argument(
  367. '-O',
  368. '--opt',
  369. type=str,
  370. nargs='*',
  371. help="Filters on optimization levels.")
  372. arg_parser.add_argument(
  373. '-t', '--test', type=str, nargs='*', help="Filters on tests.")
  374. arg_parser.add_argument(
  375. '-r',
  376. '--relative',
  377. action='store_const',
  378. const=True,
  379. help="Produce bars that are relative to some baseline.",
  380. default=False)
  381. arg_parser.add_argument(
  382. '-e',
  383. '--embed',
  384. action='store_const',
  385. const=True,
  386. help="Don't include a LaTeX document header and footer.",
  387. default=False)
  388. arg_parser.add_argument(
  389. '-b',
  390. '--bar-width',
  391. type=int,
  392. help="The width, in points, of a bar on the bar chart. Defaults to '%s'."
  393. % DEFAULT_BAR_WIDTH,
  394. default=DEFAULT_BAR_WIDTH)
  395. arg_parser.add_argument(
  396. '-s',
  397. '--split',
  398. type=str,
  399. nargs='*',
  400. help="Picks other quantities which are subtracted from the "
  401. "main quantity and then combined in a stacked chart.")
  402. arg_parser.add_argument(
  403. '-C',
  404. '--confidence',
  405. action='store_const',
  406. const=True,
  407. help="Computes 95% confidence intervals from standard deviation and "
  408. "sample size quantities.",
  409. default=False)
  410. args = arg_parser.parse_args()
  411. all_perf_data = utils.parse_perf_data(args.input)
  412. sorted_opt_levels = None
  413. opt_level_filter = None
  414. if args.opt:
  415. optimization_set = set(args.opt)
  416. opt_level_filter = lambda x: x in optimization_set
  417. sorted_opt_levels = list(args.opt)
  418. test_filter = None
  419. if args.test:
  420. test_set = set(args.test)
  421. test_filter = lambda x: x in test_set
  422. all_perf_data = filter_perf_data(
  423. all_perf_data,
  424. filter_opt_level=opt_level_filter,
  425. filter_test=test_filter)
  426. main_perf_data = perf_list_to_dict(all_perf_data[args.quantity])
  427. baseline_opt_level = get_baseline_optimization_level(main_perf_data)
  428. split_perf_data = [(s, perf_list_to_dict(all_perf_data[s]))
  429. for s in (args.split if args.split is not None else [])]
  430. sub_perf_data = subtract_perf_data(
  431. main_perf_data, *[s_data for _, s_data in split_perf_data])
  432. if args.relative:
  433. sub_perf_data = get_relative_measurements(
  434. sub_perf_data, baseline_opt_level, main_perf_data)
  435. split_perf_data = [(q, get_relative_measurements(
  436. split_data, baseline_opt_level, main_perf_data))
  437. for q, split_data in split_perf_data]
  438. split_perf_data = [(q, perf_dict_to_list(split_data))
  439. for q, split_data in split_perf_data]
  440. if len(split_perf_data) == 0:
  441. if args.confidence:
  442. sd_data = perf_list_to_dict(
  443. all_perf_data[args.quantity + '-standard-deviation'])
  444. if args.relative:
  445. sd_data = get_relative_measurements(
  446. sd_data, baseline_opt_level, main_perf_data)
  447. sample_size_data = perf_list_to_dict(
  448. all_perf_data[args.quantity + '-sample-size'])
  449. conf_intervals = {
  450. opt_level: {
  451. name: 2 * (1.96 * sd_data[opt_level][name] /
  452. (sample_size_data[opt_level][name]**0.5), )
  453. for name, mean in measurements.items()
  454. }
  455. for opt_level, measurements in sub_perf_data.items()
  456. }
  457. else:
  458. conf_intervals = None
  459. print(create_latex_chart(sub_perf_data, sorted_opt_levels, args.embed,
  460. args.bar_width, conf_intervals))
  461. else:
  462. print(create_stacked_latex_chart([
  463. (args.quantity, perf_dict_to_list(sub_perf_data))
  464. ] + split_perf_data, sorted_opt_levels, args.embed, args.bar_width))
  465. if __name__ == '__main__':
  466. main()