123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- """Converts performance data files (as produced by utils.py) to LaTeX charts."""
- import argparse
- import colorsys
- import utils
- # Generated LaTeX is based on the accepted answer to
- # http://tex.stackexchange.com/questions/101320/grouped-bar-chart
- # pylint: disable=I0011,W0141
- COLOR_SCHEME_MAX_COLOR = (8, 30, 89)
- COLOR_SCHEME_MIN_COLOR = (216, 33, 0)
- DEFAULT_BAR_WIDTH = 14
- LATEX_HEADER = r"""\documentclass[12pt,a4paper,onecolumn,openright]{report}
- \usepackage[landscape]{geometry}
- \usepackage{xcolor}
- \usepackage{pgfplots}
- \usepackage{tikz}
- \usepgfplotslibrary{units}
- % Define bar chart colors
- %"""
- LATEX_DOCUMENT_HEADER = r"""\begin{document}"""
- TIKZ_PICTURE_HEADER = r"""\begin{tikzpicture}"""
- TIKZ_PICTURE_FOOTER = r"""\end{tikzpicture}"""
- LATEX_DOCUMENT_FOOTER = r"""\end{document}"""
- def encode_latex_string(value):
- """Encodes the given string as a LaTeX string."""
- # I guess this is good enough for now. This may need to be
- # revisited if we encounter more complicated names.
- return '{%s}' % value.replace('_', '\\_')
- def assemble_latex_chart(optimization_levels,
- color_defs,
- test_names,
- data,
- embed=False,
- bar_width=DEFAULT_BAR_WIDTH,
- confidence_intervals=None):
- """Assembles a LaTeX chart from the given components."""
- lines = []
- if not embed:
- lines.append(LATEX_HEADER)
- for color_name, (red, green, blue) in color_defs:
- lines.append(r'\definecolor{%s}{HTML}{%02X%02X%02X}' %
- (color_name, red, green, blue))
- lines.append(LATEX_DOCUMENT_HEADER)
- lines.append(TIKZ_PICTURE_HEADER)
- lines.append(r""" \begin{axis}[
- width = 0.85*\textwidth,
- height = 8cm,
- major x tick style = transparent,
- ybar=2*\pgflinewidth,
- bar width=%dpt,
- ymajorgrids = true,
- ylabel = {Run time},
- symbolic x coords={%s},
- xtick = data,
- scaled y ticks = false,
- enlarge x limits=0.25,
- ymin=0,
- y unit=s,
- legend cell align=left,
- legend style={
- at={(1,1.05)},
- anchor=south east,
- column sep=1ex
- },
- nodes near coords,
- every node near coord/.append style={rotate=90, anchor=west}
- ]""" % (bar_width, ','.join(map(encode_latex_string, test_names))))
- for color_name, points in data:
- if confidence_intervals is None:
- lines.append(r"""
- \addplot[style={%s,fill=%s,mark=none}]
- coordinates {%s};""" % (color_name, color_name, ' '.join(
- [('(%s,%s)' % (encode_latex_string(name), measurement))
- for name, measurement in points])))
- else:
- lines.append(r"""
- \addplot[style={%s,fill=%s,mark=none},error bars/.cd, y dir=both, y explicit,error bar style=darkgray]
- coordinates {%s};""" % (color_name, color_name, ' '.join(
- [('(%s,%s) += (%s,%s) -= (%s,%s)' %
- (encode_latex_string(name), measurement,
- encode_latex_string(name),
- confidence_intervals[color_name][name][0],
- encode_latex_string(name),
- confidence_intervals[color_name][name][1]))
- for name, measurement in points])))
- lines.append(r"""
- \legend{%s}""" %
- ','.join(map(encode_latex_string, optimization_levels)))
- lines.append(r"""
- \end{axis}""")
- lines.append(TIKZ_PICTURE_FOOTER)
- if not embed:
- lines.append(LATEX_DOCUMENT_FOOTER)
- return '\n'.join(lines)
- def flip_tuple_keys(tuple_list):
- index_map = {}
- results = []
- for top, top_data in tuple_list:
- for child, child_data in top_data:
- if child not in index_map:
- index_map[child] = len(results)
- results.append((child, []))
- _, child_list = results[index_map[child]]
- child_list.append((top, child_data))
- return results
- def assemble_stacked_latex_chart(optimization_levels,
- quantity_colors,
- test_names,
- data,
- embed=False,
- bar_width=DEFAULT_BAR_WIDTH):
- """Assembles a stacked LaTeX chart from the given components."""
- lines = []
- quantity_color_names = {
- name: 'chartColor%d' % i
- for i, (name, _) in enumerate(quantity_colors)
- }
- quantity_names = [name for name, _ in quantity_colors]
- if not embed:
- lines.append(LATEX_HEADER)
- for quantity_name, (red, green, blue) in quantity_colors:
- lines.append(
- r'\definecolor{%s}{HTML}{%02X%02X%02X}' %
- (quantity_color_names[quantity_name], red, green, blue))
- lines.append(LATEX_DOCUMENT_HEADER)
- lines.append(TIKZ_PICTURE_HEADER)
- lines.append(r""" \begin{axis}[
- ybar stacked,
- legend style={
- legend columns=%d,
- at={(xticklabel cs:0.5)},
- anchor=north,
- draw=none
- },
- width=0.85*\textwidth,
- height=8cm,
- xtick=data,
- bar width=%dpt,
- ymin=0,
- major x tick style=transparent,
- ymajorgrids=true,
- xticklabels={%s},
- x tick label style={rotate=90,anchor=east,font=\scriptsize},
- tick label style={font=\footnotesize},
- legend style={font=\footnotesize,yshift=-3ex},
- label style={font=\footnotesize},
- ylabel={Time},
- y unit=s,
- area legend,
- nodes near coords,
- every node near coord/.append style={rotate=90, anchor=west}
- ]""" % (len(optimization_levels), bar_width, ','.join(
- [encode_latex_string(name) for name in optimization_levels] *
- len(test_names))))
- for quantity, points in data:
- contents = []
- i = 0
- for _, test_data in flip_tuple_keys(points):
- for _, measurement in test_data:
- contents.append('(%d,%s)' % (i, measurement))
- i += 1
- i += 1
- lines.append(r"""
- \addplot[style={%s,fill=%s,mark=none}]
- coordinates {%s};""" %
- (quantity_color_names[quantity],
- quantity_color_names[quantity], ' '.join(contents)))
- lines.append(r"""
- \legend{%s}""" % ','.join(map(encode_latex_string, quantity_names)))
- lines.append(r"""
- \end{axis}""")
- lines.append(TIKZ_PICTURE_FOOTER)
- if not embed:
- lines.append(LATEX_DOCUMENT_FOOTER)
- return '\n'.join(lines)
- def create_latex_chart(perf_data,
- sorted_opt_levels=None,
- embed=False,
- bar_width=DEFAULT_BAR_WIDTH,
- confidence_intervals=None):
- """Creates a LaTeX chart for the given performance data."""
- if sorted_opt_levels is None:
- sorted_opt_levels = sort_by_runtime(perf_data)
- color_scheme = generate_color_scheme(sorted_opt_levels)
- opt_levels = []
- color_defs = []
- color_conf_intervals = {}
- test_names = []
- data = []
- for i, optimization_level in enumerate(sorted_opt_levels):
- measurements = perf_data[optimization_level]
- color = color_scheme[optimization_level]
- color_name = 'chartColor%d' % i
- opt_levels.append(optimization_level)
- color_defs.append((color_name, color))
- data.append((color_name, measurements.items()))
- color_conf_intervals[color_name] = {}
- for name, _ in measurements.items():
- if confidence_intervals is not None:
- color_conf_intervals[color_name][name] = confidence_intervals[
- optimization_level][name]
- if name not in test_names:
- test_names.append(name)
- return assemble_latex_chart(
- opt_levels, color_defs, test_names, data, embed, bar_width, None
- if confidence_intervals is None else color_conf_intervals)
- def create_stacked_latex_chart(perf_data,
- sorted_optimization_levels=None,
- embed=False,
- bar_width=DEFAULT_BAR_WIDTH):
- """Creates a stacked LaTeX chart for the given performance data."""
- color_scheme = generate_color_scheme([q for q, _ in perf_data])
- opt_levels = []
- quantity_colors = {}
- test_names = []
- for quantity, measurements in perf_data:
- quantity_colors[quantity] = color_scheme[quantity]
- for optimization_level, data_points in measurements:
- if optimization_level not in opt_levels:
- opt_levels.append(optimization_level)
- for name, _ in data_points:
- if name not in test_names:
- test_names.append(name)
- sorted_data = list(
- sorted(perf_data, key=lambda (_, data): sum(get_mean_runtimes(perf_list_to_dict(data)).values())))
- if sorted_optimization_levels is None and len(sorted_data) > 0:
- sorted_optimization_levels = sort_by_runtime(sorted_data[0])
- sorted_data = [(quantity, list(
- sorted(
- data, key=lambda (opt, _): sorted_optimization_levels.index(opt))))
- for quantity, data in sorted_data]
- return assemble_stacked_latex_chart(
- sorted_optimization_levels,
- [(q, quantity_colors[q]) for q, _ in sorted_data], test_names,
- sorted_data, embed, bar_width)
- def get_mean_runtimes(perf_data):
- """Computes the mean run-time of every optimization level in the given
- performance data."""
- return {
- opt_level: utils.mean(perf_data[opt_level].values())
- for opt_level in perf_data.keys()
- }
- def get_baseline_optimization_level(perf_data):
- """Gets a baseline optimization level from the given performance data.
- This baseline optimization level is guaranteed to exist for every test case.
- If no baseline optimization level can be found, then None is returned."""
- # First find the name of all test cases.
- all_test_names = set()
- for optimization_level, measurements in perf_data.items():
- all_test_names.update(measurements.keys())
- # Filter optimization levels which are used for every test case.
- candidate_opt_levels = []
- for optimization_level, measurements in perf_data.items():
- if len(all_test_names) == len(measurements):
- candidate_opt_levels.append(optimization_level)
- if len(candidate_opt_levels) == 0:
- # Looks like there is no baseline optimization level.
- return None
- # Pick the optimization level with the highest total run-time as the baseline.
- return max(candidate_opt_levels,
- key=lambda opt_level: sum(perf_data[opt_level].values()))
- def get_relative_measurements(perf_data,
- baseline_optimization_level,
- baseline_perf_data=None):
- """Computes a map of measurements that are relative to the given optimization level."""
- if baseline_perf_data is None:
- baseline_perf_data = perf_data
- results = {}
- for optimization_level, measurements in perf_data.items():
- results[optimization_level] = {}
- for test_name, data_point in measurements.items():
- results[optimization_level][test_name] = (
- data_point /
- baseline_perf_data[baseline_optimization_level][test_name])
- return results
- def perf_list_to_dict(perf_list):
- """Converts performance data from a list representation to a dictionary representation."""
- return {opt_level: dict(tests) for opt_level, tests in perf_list}
- def perf_dict_to_list(perf_dict):
- """Converts performance data from a dictionary representation to a list representation."""
- return [(opt_level, tests.items())
- for opt_level, tests in perf_dict.items()]
- def interpolate(value_range, index, length):
- """Uses an index and a length to interpolate in the given range."""
- min_val, max_val = value_range
- if length == 1:
- return max_val
- else:
- return min_val + float(index) * (max_val - min_val) / float(length - 1)
- def sort_by_runtime(perf_data):
- """Sorts the optimization levels by mean relative runtimes."""
- baseline_opt_level = get_baseline_optimization_level(perf_data)
- relative_perf = get_relative_measurements(perf_data, baseline_opt_level)
- # Sort the optimization levels by their mean runtimes.
- mean_runtimes = get_mean_runtimes(relative_perf)
- return list(
- sorted(
- mean_runtimes.keys(),
- key=lambda opt_level: mean_runtimes[opt_level],
- reverse=True))
- def generate_color_scheme(sorted_opt_levels):
- """Assigns a color to every optimization level in the given performance data."""
- # Assign colors to the optimization levels.
- color_scheme = {}
- min_hue, min_sat, min_val = colorsys.rgb_to_hsv(
- *[c / float(255) for c in COLOR_SCHEME_MIN_COLOR])
- max_hue, max_sat, max_val = colorsys.rgb_to_hsv(
- *[c / float(255) for c in COLOR_SCHEME_MAX_COLOR])
- for i, opt_level in enumerate(sorted_opt_levels):
- hue = interpolate((min_hue, max_hue), i, len(sorted_opt_levels))
- sat = interpolate((min_sat, max_sat), i, len(sorted_opt_levels))
- val = interpolate((min_val, max_val), i, len(sorted_opt_levels))
- color = [
- component * 255 for component in colorsys.hsv_to_rgb(hue, sat, val)
- ]
- color_scheme[opt_level] = color
- return color_scheme
- def filter_perf_data(perf_data,
- filter_quantity=None,
- filter_opt_level=None,
- filter_test=None):
- """Applies a number of filters to performance data."""
- if filter_quantity is None:
- filter_quantity = lambda _: True
- if filter_opt_level is None:
- filter_opt_level = lambda _: True
- if filter_test is None:
- filter_test = lambda _: True
- results = {}
- for quantity, quantity_data in perf_data.items():
- if not filter_quantity(quantity):
- continue
- new_quantity_data = []
- for opt_level, opt_data in quantity_data:
- if not filter_opt_level(opt_level):
- continue
- new_opt_data = []
- for test, data in opt_data:
- if filter_test(test):
- new_opt_data.append((test, data))
- if len(new_opt_data) > 0:
- new_quantity_data.append((opt_level, new_opt_data))
- if len(new_quantity_data) > 0:
- results[quantity] = new_quantity_data
- return results
- def subtract_perf_data(lhs_perf_dict, *rhs_perf_dicts):
- """Performs measurement-wise subtraction on performance dictionaries."""
- return {
- opt_level: {
- test_name: measurement - sum(p_dict[opt_level][test_name]
- for p_dict in rhs_perf_dicts)
- for test_name, measurement in opt_data.items()
- }
- for opt_level, opt_data in lhs_perf_dict.items()
- }
- def main():
- arg_parser = argparse.ArgumentParser()
- arg_parser.add_argument(
- 'input', help='The performance data file.', nargs='?', default=None)
- arg_parser.add_argument(
- '-q',
- '--quantity',
- type=str,
- help="The quantity to build a bar chart for. Defaults to '%s'." %
- utils.TOTAL_TIME_QUANTITY,
- default=utils.TOTAL_TIME_QUANTITY)
- arg_parser.add_argument(
- '-O',
- '--opt',
- type=str,
- nargs='*',
- help="Filters on optimization levels.")
- arg_parser.add_argument(
- '-t', '--test', type=str, nargs='*', help="Filters on tests.")
- arg_parser.add_argument(
- '-r',
- '--relative',
- action='store_const',
- const=True,
- help="Produce bars that are relative to some baseline.",
- default=False)
- arg_parser.add_argument(
- '-e',
- '--embed',
- action='store_const',
- const=True,
- help="Don't include a LaTeX document header and footer.",
- default=False)
- arg_parser.add_argument(
- '-b',
- '--bar-width',
- type=int,
- help="The width, in points, of a bar on the bar chart. Defaults to '%s'."
- % DEFAULT_BAR_WIDTH,
- default=DEFAULT_BAR_WIDTH)
- arg_parser.add_argument(
- '-s',
- '--split',
- type=str,
- nargs='*',
- help="Picks other quantities which are subtracted from the "
- "main quantity and then combined in a stacked chart.")
- arg_parser.add_argument(
- '-C',
- '--confidence',
- action='store_const',
- const=True,
- help="Computes 95% confidence intervals from standard deviation and "
- "sample size quantities.",
- default=False)
- args = arg_parser.parse_args()
- all_perf_data = utils.parse_perf_data(args.input)
- sorted_opt_levels = None
- opt_level_filter = None
- if args.opt:
- optimization_set = set(args.opt)
- opt_level_filter = lambda x: x in optimization_set
- sorted_opt_levels = list(args.opt)
- test_filter = None
- if args.test:
- test_set = set(args.test)
- test_filter = lambda x: x in test_set
- all_perf_data = filter_perf_data(
- all_perf_data,
- filter_opt_level=opt_level_filter,
- filter_test=test_filter)
- main_perf_data = perf_list_to_dict(all_perf_data[args.quantity])
- baseline_opt_level = get_baseline_optimization_level(main_perf_data)
- split_perf_data = [(s, perf_list_to_dict(all_perf_data[s]))
- for s in (args.split if args.split is not None else [])]
- sub_perf_data = subtract_perf_data(
- main_perf_data, *[s_data for _, s_data in split_perf_data])
- if args.relative:
- sub_perf_data = get_relative_measurements(
- sub_perf_data, baseline_opt_level, main_perf_data)
- split_perf_data = [(q, get_relative_measurements(
- split_data, baseline_opt_level, main_perf_data))
- for q, split_data in split_perf_data]
- split_perf_data = [(q, perf_dict_to_list(split_data))
- for q, split_data in split_perf_data]
- if len(split_perf_data) == 0:
- if args.confidence:
- sd_data = perf_list_to_dict(
- all_perf_data[args.quantity + '-standard-deviation'])
- if args.relative:
- sd_data = get_relative_measurements(
- sd_data, baseline_opt_level, main_perf_data)
- sample_size_data = perf_list_to_dict(
- all_perf_data[args.quantity + '-sample-size'])
- conf_intervals = {
- opt_level: {
- name: 2 * (1.96 * sd_data[opt_level][name] /
- (sample_size_data[opt_level][name]**0.5), )
- for name, mean in measurements.items()
- }
- for opt_level, measurements in sub_perf_data.items()
- }
- else:
- conf_intervals = None
- print(create_latex_chart(sub_perf_data, sorted_opt_levels, args.embed,
- args.bar_width, conf_intervals))
- else:
- print(create_stacked_latex_chart([
- (args.quantity, perf_dict_to_list(sub_perf_data))
- ] + split_perf_data, sorted_opt_levels, args.embed, args.bar_width))
- if __name__ == '__main__':
- main()
|