Bläddra i källkod

Make perf2tex capable of generating stacked bar charts

jonathanvdc 8 år sedan
förälder
incheckning
9c5eb8ec0f
1 ändrade filer med 220 tillägg och 26 borttagningar
  1. 220 26
      performance/perf2tex.py

+ 220 - 26
performance/perf2tex.py

@@ -9,7 +9,7 @@ import utils
 
 # pylint: disable=I0011,W0141
 
-COLOR_SCHEME_MAX_COLOR = (36, 255, 106)
+COLOR_SCHEME_MAX_COLOR = (16, 59, 176)
 COLOR_SCHEME_MIN_COLOR = (216, 33, 0)
 
 DEFAULT_BAR_WIDTH = 14
@@ -93,6 +93,91 @@ def assemble_latex_chart(optimization_levels,
     return '\n'.join(lines)
 
 
+def flip_tuple_keys(tuple_list):
+    index_map = {}
+    results = []
+    for top, top_data in tuple_list:
+        for child, child_data in top_data:
+            if child not in index_map:
+                index_map[child] = len(results)
+                results.append((child, []))
+
+            _, child_list = results[index_map[child]]
+            child_list.append((top, child_data))
+
+    return results
+
+
+def assemble_stacked_latex_chart(optimization_levels,
+                                 quantity_colors,
+                                 test_names,
+                                 data,
+                                 embed=False,
+                                 bar_width=DEFAULT_BAR_WIDTH):
+    """Assembles a stacked LaTeX chart from the given components."""
+    lines = []
+    quantity_color_names = {
+        name: 'chartColor%d' % i
+        for i, (name, _) in enumerate(quantity_colors)
+    }
+    quantity_names = [name for name, _ in quantity_colors]
+    if not embed:
+        lines.append(LATEX_HEADER)
+        for quantity_name, (red, green, blue) in quantity_colors:
+            lines.append(
+                r'\definecolor{%s}{HTML}{%02X%02X%02X}' %
+                (quantity_color_names[quantity_name], red, green, blue))
+        lines.append(LATEX_DOCUMENT_HEADER)
+    lines.append(TIKZ_PICTURE_HEADER)
+    lines.append(r"""    \begin{axis}[
+        ybar stacked,
+        legend style={
+            legend columns=%d,
+            at={(xticklabel cs:0.5)},
+            anchor=north,
+            draw=none
+        },
+        width=0.85*\textwidth,
+        height=8cm,
+        xtick=data,
+        bar width=%dpt,
+        ymin=0,
+        major x tick style=transparent,
+        ymajorgrids=true,
+        xticklabels={%s},
+        x tick label style={rotate=90,anchor=east,font=\tiny},
+        tick label style={font=\footnotesize},
+        legend style={font=\footnotesize,yshift=-3ex},
+        label style={font=\footnotesize},
+        ylabel={Time},
+        y unit=s,
+        area legend
+    ]""" % (len(optimization_levels), bar_width, ','.join(
+        [encode_latex_string(name) for name in optimization_levels] *
+        len(test_names))))
+    for quantity, points in data:
+        contents = []
+        i = 0
+        for _, test_data in flip_tuple_keys(points):
+            for _, measurement in test_data:
+                contents.append('(%d,%s)' % (i, measurement))
+                i += 1
+            i += 1
+        lines.append(r"""
+        \addplot[style={%s,fill=%s,mark=none}]
+            coordinates {%s};""" %
+                     (quantity_color_names[quantity],
+                      quantity_color_names[quantity], ' '.join(contents)))
+    lines.append(r"""
+        \legend{%s}""" % ','.join(map(encode_latex_string, quantity_names)))
+    lines.append(r"""
+    \end{axis}""")
+    lines.append(TIKZ_PICTURE_FOOTER)
+    if not embed:
+        lines.append(LATEX_DOCUMENT_FOOTER)
+    return '\n'.join(lines)
+
+
 def create_latex_chart(perf_data,
                        sorted_opt_levels=None,
                        embed=False,
@@ -121,6 +206,42 @@ def create_latex_chart(perf_data,
                                 embed, bar_width)
 
 
+def create_stacked_latex_chart(perf_data,
+                               sorted_optimization_levels=None,
+                               embed=False,
+                               bar_width=DEFAULT_BAR_WIDTH):
+    """Creates a stacked LaTeX chart for the given performance data."""
+    color_scheme = generate_color_scheme([q for q, _ in perf_data])
+    opt_levels = []
+    quantity_colors = {}
+    test_names = []
+    for quantity, measurements in perf_data:
+        quantity_colors[quantity] = color_scheme[quantity]
+        for optimization_level, data_points in measurements:
+            if optimization_level not in opt_levels:
+                opt_levels.append(optimization_level)
+
+            for name, _ in data_points:
+                if name not in test_names:
+                    test_names.append(name)
+
+    sorted_data = list(
+        sorted(perf_data, key=lambda (_, data): sum(get_mean_runtimes(perf_list_to_dict(data)).values())))
+
+    if sorted_optimization_levels is None and len(sorted_data) > 0:
+        sorted_optimization_levels = sort_by_runtime(sorted_data[0])
+
+    sorted_data = [(quantity, list(
+        sorted(
+            data, key=lambda (opt, _): sorted_optimization_levels.index(opt))))
+                   for quantity, data in sorted_data]
+
+    return assemble_stacked_latex_chart(
+        sorted_optimization_levels,
+        [(q, quantity_colors[q]) for q, _ in sorted_data], test_names,
+        sorted_data, embed, bar_width)
+
+
 def get_mean_runtimes(perf_data):
     """Computes the mean run-time of every optimization level in the given
        performance data."""
@@ -132,7 +253,7 @@ def get_mean_runtimes(perf_data):
 
 def get_baseline_optimization_level(perf_data):
     """Gets a baseline optimization level from the given performance data.
-       This baseline optimization level is guaranteed to be for every test case.
+       This baseline optimization level is guaranteed to exist for every test case.
        If no baseline optimization level can be found, then None is returned."""
     # First find the name of all test cases.
     all_test_names = set()
@@ -149,19 +270,25 @@ def get_baseline_optimization_level(perf_data):
         # Looks like there is no baseline optimization level.
         return None
 
-    # Pick the optimization level with the lowest total run-time as the baseline.
-    return min(candidate_opt_levels,
+    # Pick the optimization level with the highest total run-time as the baseline.
+    return max(candidate_opt_levels,
                key=lambda opt_level: sum(perf_data[opt_level].values()))
 
 
-def get_relative_measurements(perf_data, baseline_optimization_level):
+def get_relative_measurements(perf_data,
+                              baseline_optimization_level,
+                              baseline_perf_data=None):
     """Computes a map of measurements that are relative to the given optimization level."""
+    if baseline_perf_data is None:
+        baseline_perf_data = perf_data
+
     results = {}
     for optimization_level, measurements in perf_data.items():
         results[optimization_level] = {}
         for test_name, data_point in measurements.items():
             results[optimization_level][test_name] = (
-                data_point / perf_data[baseline_optimization_level][test_name])
+                data_point /
+                baseline_perf_data[baseline_optimization_level][test_name])
 
     return results
 
@@ -219,6 +346,54 @@ def generate_color_scheme(sorted_opt_levels):
     return color_scheme
 
 
+def filter_perf_data(perf_data,
+                     filter_quantity=None,
+                     filter_opt_level=None,
+                     filter_test=None):
+    """Applies a number of filters to performance data."""
+    if filter_quantity is None:
+        filter_quantity = lambda _: True
+    if filter_opt_level is None:
+        filter_opt_level = lambda _: True
+    if filter_test is None:
+        filter_test = lambda _: True
+
+    results = {}
+    for quantity, quantity_data in perf_data.items():
+        if not filter_quantity(quantity):
+            continue
+
+        new_quantity_data = []
+        for opt_level, opt_data in quantity_data:
+            if not filter_opt_level(opt_level):
+                continue
+
+            new_opt_data = []
+            for test, data in opt_data:
+                if filter_test(test):
+                    new_opt_data.append((test, data))
+
+            if len(new_opt_data) > 0:
+                new_quantity_data.append((opt_level, new_opt_data))
+
+        if len(new_quantity_data) > 0:
+            results[quantity] = new_quantity_data
+
+    return results
+
+
+def subtract_perf_data(lhs_perf_dict, *rhs_perf_dicts):
+    """Performs measurement-wise subtraction on performance dictionaries."""
+    return {
+        opt_level: {
+            test_name: measurement - sum(p_dict[opt_level][test_name]
+                                         for p_dict in rhs_perf_dicts)
+            for test_name, measurement in opt_data.items()
+        }
+        for opt_level, opt_data in lhs_perf_dict.items()
+    }
+
+
 def main():
     arg_parser = argparse.ArgumentParser()
     arg_parser.add_argument(
@@ -259,41 +434,60 @@ def main():
         help="The width, in points, of a bar on the bar chart. Defaults to '%s'."
         % DEFAULT_BAR_WIDTH,
         default=DEFAULT_BAR_WIDTH)
+    arg_parser.add_argument(
+        '-s',
+        '--split',
+        type=str,
+        nargs='*',
+        help="Picks other quantities which are subtracted from the "
+        "main quantity and then combined in a stacked chart.")
 
     args = arg_parser.parse_args()
 
-    perf_data = utils.parse_perf_data(args.input)[args.quantity]
+    all_perf_data = utils.parse_perf_data(args.input)
     sorted_opt_levels = None
 
+    opt_level_filter = None
     if args.opt:
         optimization_set = set(args.opt)
-        perf_data = [(optimization_level, measurements)
-                     for optimization_level, measurements in perf_data
-                     if optimization_level in optimization_set]
+        opt_level_filter = lambda x: x in optimization_set
         sorted_opt_levels = list(args.opt)
 
+    test_filter = None
     if args.test:
         test_set = set(args.test)
-        new_perf_data = []
-        for optimization_level, measurements in perf_data:
-            new_measurements = []
-            for test_name, data_point in measurements:
-                if test_name in test_set:
-                    new_measurements.append((test_name, data_point))
+        test_filter = lambda x: x in test_set
 
-            if len(new_measurements) > 0:
-                new_perf_data.append((optimization_level, new_measurements))
-        perf_data = new_perf_data
+    all_perf_data = filter_perf_data(
+        all_perf_data,
+        filter_opt_level=opt_level_filter,
+        filter_test=test_filter)
 
-    perf_data_dict = perf_list_to_dict(perf_data)
+    main_perf_data = perf_list_to_dict(all_perf_data[args.quantity])
+    baseline_opt_level = get_baseline_optimization_level(main_perf_data)
 
-    if args.relative:
-        baseline_opt_level = get_baseline_optimization_level(perf_data_dict)
-        perf_data_dict = get_relative_measurements(perf_data_dict,
-                                                   baseline_opt_level)
+    split_perf_data = [(s, perf_list_to_dict(all_perf_data[s]))
+                       for s in (args.split if args.split is not None else [])]
+    sub_perf_data = subtract_perf_data(
+        main_perf_data, *[s_data for _, s_data in split_perf_data])
 
-    print(create_latex_chart(perf_data_dict, sorted_opt_levels, args.embed,
-                             args.bar_width))
+    if args.relative:
+        sub_perf_data = get_relative_measurements(
+            sub_perf_data, baseline_opt_level, main_perf_data)
+        split_perf_data = [(q, get_relative_measurements(
+            split_data, baseline_opt_level, main_perf_data))
+                           for q, split_data in split_perf_data]
+
+    split_perf_data = [(q, perf_dict_to_list(split_data))
+                       for q, split_data in split_perf_data]
+
+    if len(split_perf_data) == 0:
+        print(create_latex_chart(sub_perf_data, sorted_opt_levels, args.embed,
+                                 args.bar_width))
+    else:
+        print(create_stacked_latex_chart([
+            (args.quantity, perf_dict_to_list(sub_perf_data))
+        ] + split_perf_data, sorted_opt_levels, args.embed, args.bar_width))
 
 
 if __name__ == '__main__':