123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- """Converts performance data files (as produced by utils.py) to LaTeX charts."""
- import argparse
- import colorsys
- import utils
- # Generated LaTeX is based on the accepted answer to
- # http://tex.stackexchange.com/questions/101320/grouped-bar-chart
- # pylint: disable=I0011,W0141
- COLOR_SCHEME_MIN_COLOR = (36, 255, 106)
- COLOR_SCHEME_MAX_COLOR = (216, 33, 0)
- LATEX_HEADER = r"""\documentclass[12pt,a4paper,onecolumn,openright]{report}
- \usepackage[landscape]{geometry}
- \usepackage{xcolor}
- \usepackage{pgfplots}
- \usepackage{tikz}
- \usepgfplotslibrary{units}
- % Define bar chart colors
- %"""
- LATEX_DOCUMENT_HEADER = r"""\begin{document}
- \begin{tikzpicture}"""
- LATEX_DOCUMENT_FOOTER = r"""\end{tikzpicture}
- \end{document}"""
- def encode_latex_string(value):
- """Encodes the given string as a LaTeX string."""
- # I guess this is good enough for now. This may need to be
- # revisited if we encounter more complicated names.
- return '{%s}' % value.replace('_', '\\_')
- def assemble_latex_chart(optimization_levels, color_defs, test_names, data):
- """Assembles a LaTeX chart from the given components."""
- lines = []
- lines.append(LATEX_HEADER)
- for color_name, (red, green, blue) in color_defs:
- lines.append(r'\definecolor{%s}{HTML}{%02X%02X%02X}' % (color_name, red, green, blue))
- lines.append(LATEX_DOCUMENT_HEADER)
- lines.append(r"""
- \begin{axis}[
- width = 0.85*\textwidth,
- height = 8cm,
- major x tick style = transparent,
- ybar=2*\pgflinewidth,
- bar width=14pt,
- ymajorgrids = true,
- ylabel = {Run time},
- symbolic x coords={%s},
- xtick = data,
- scaled y ticks = false,
- enlarge x limits=0.25,
- ymin=0,
- y unit=s,
- legend cell align=left,
- legend style={
- at={(1,1.05)},
- anchor=south east,
- column sep=1ex
- }
- ]""" % ','.join(map(encode_latex_string, test_names)))
- for color_name, points in data:
- lines.append(r"""
- \addplot[style={%s,fill=%s,mark=none}]
- coordinates {%s};""" % (
- color_name, color_name,
- ' '.join([('(%s,%s)' % (encode_latex_string(name), measurement))
- for name, measurement in points])))
- lines.append(r"""
- \legend{%s}""" % ','.join(map(encode_latex_string, optimization_levels)))
- lines.append(r"""
- \end{axis}""")
- lines.append(LATEX_DOCUMENT_FOOTER)
- return '\n'.join(lines)
- def create_latex_chart(perf_data):
- """Creates a LaTeX chart for the given performance data."""
- sorted_opt_levels = sort_by_runtime(perf_data)
- color_scheme = generate_color_scheme(sorted_opt_levels)
- opt_levels = []
- color_defs = []
- test_names = []
- data = []
- for i, optimization_level in enumerate(sorted_opt_levels):
- measurements = perf_data[optimization_level]
- color = color_scheme[optimization_level]
- color_name = 'chartColor%d' % i
- opt_levels.append(optimization_level)
- color_defs.append((color_name, color))
- data.append((color_name, measurements.items()))
- for name, _ in measurements.items():
- if name not in test_names:
- test_names.append(name)
- return assemble_latex_chart(opt_levels, color_defs, test_names, data)
- def get_mean_runtimes(perf_data):
- """Computes the mean run-time of every optimization level in the given
- performance data."""
- return {
- opt_level: utils.mean(perf_data[opt_level].values())
- for opt_level in perf_data.keys()
- }
- def get_baseline_optimization_level(perf_data):
- """Gets a baseline optimization level from the given performance data.
- This baseline optimization level is guaranteed to be for every test case.
- If no baseline optimization level can be found, then None is returned."""
- # First find the name of all test cases.
- all_test_names = set()
- for optimization_level, measurements in perf_data.items():
- all_test_names.update(measurements.keys())
- # Filter optimization levels which are used for every test case.
- candidate_opt_levels = []
- for optimization_level, measurements in perf_data.items():
- if len(all_test_names) == len(measurements):
- candidate_opt_levels.append(optimization_level)
- if len(candidate_opt_levels) == 0:
- # Looks like there is no baseline optimization level.
- return None
- # Pick the optimization level with the lowest total run-time as the baseline.
- return min(candidate_opt_levels, key=lambda opt_level: sum(perf_data[opt_level].values()))
- def get_relative_measurements(perf_data, baseline_optimization_level):
- """Computes a map of measurements that are relative to the given optimization level."""
- results = {}
- for optimization_level, measurements in perf_data.items():
- results[optimization_level] = {}
- for test_name, data_point in measurements.items():
- results[optimization_level][test_name] = (
- data_point / perf_data[baseline_optimization_level][test_name])
- return results
- def perf_list_to_dict(perf_list):
- """Converts performance data from a list representation to a dictionary representation."""
- return {opt_level: dict(tests) for opt_level, tests in perf_list}
- def perf_dict_to_list(perf_dict):
- """Converts performance data from a dictionary representation to a list representation."""
- return [(opt_level, tests.items()) for opt_level, tests in perf_dict.items()]
- def interpolate(value_range, index, length):
- """Uses an index and a length to interpolate in the given range."""
- min_val, max_val = value_range
- if length == 1:
- return max_val
- else:
- return min_val + float(index) * (max_val - min_val) / float(length - 1)
- def sort_by_runtime(perf_data):
- """Sorts the optimization levels by mean relative runtimes."""
- baseline_opt_level = get_baseline_optimization_level(perf_data)
- relative_perf = get_relative_measurements(perf_data, baseline_opt_level)
- # Sort the optimization levels by their mean runtimes.
- mean_runtimes = get_mean_runtimes(relative_perf)
- return list(sorted(mean_runtimes.keys(), key=lambda opt_level: mean_runtimes[opt_level]))
- def generate_color_scheme(sorted_opt_levels):
- """Assigns a color to every optimization level in the given performance data."""
- # Assign colors to the optimization levels.
- color_scheme = {}
- min_hue, min_sat, min_val = colorsys.rgb_to_hsv(
- *[c / float(255) for c in COLOR_SCHEME_MIN_COLOR])
- max_hue, max_sat, max_val = colorsys.rgb_to_hsv(
- *[c / float(255) for c in COLOR_SCHEME_MAX_COLOR])
- for i, opt_level in enumerate(sorted_opt_levels):
- hue = interpolate((min_hue, max_hue), i, len(sorted_opt_levels))
- sat = interpolate((min_sat, max_sat), i, len(sorted_opt_levels))
- val = interpolate((min_val, max_val), i, len(sorted_opt_levels))
- color = [component * 255 for component in colorsys.hsv_to_rgb(hue, sat, val)]
- color_scheme[opt_level] = color
- return color_scheme
- def main():
- arg_parser = argparse.ArgumentParser()
- arg_parser.add_argument('input', help='The performance data file.')
- arg_parser.add_argument(
- '-q', '--quantity', type=str,
- help="The quantity to build a bar chart for. Defaults to '%s'" % utils.TOTAL_TIME_QUANTITY,
- default=utils.TOTAL_TIME_QUANTITY)
- arg_parser.add_argument(
- '-O', '--opt', type=str, nargs='*',
- help="Filters on optimization levels.")
- arg_parser.add_argument(
- '-t', '--test', type=str, nargs='*',
- help="Filters on tests.")
- arg_parser.add_argument(
- '-r', '--relative', action='store_const', const=True,
- help="Produce bars that are relative to some baseline.", default=False)
- args = arg_parser.parse_args()
- perf_data = utils.parse_perf_data(args.input)[args.quantity]
- if args.opt:
- optimization_set = set(args.opt)
- perf_data = [
- (optimization_level, measurements)
- for optimization_level, measurements in perf_data
- if optimization_level in optimization_set]
- if args.test:
- test_set = set(args.test)
- new_perf_data = []
- for optimization_level, measurements in perf_data:
- new_measurements = []
- for test_name, data_point in measurements:
- if test_name in test_set:
- new_measurements.append((test_name, data_point))
- if len(new_measurements) > 0:
- new_perf_data.append((optimization_level, new_measurements))
- perf_data = new_perf_data
- perf_data_dict = perf_list_to_dict(perf_data)
- if args.relative:
- baseline_opt_level = get_baseline_optimization_level(perf_data_dict)
- perf_data_dict = get_relative_measurements(perf_data_dict, baseline_opt_level)
- print(create_latex_chart(perf_data_dict))
- if __name__ == '__main__':
- main()
|