jonathanvdc
/
modelverse
フォーク元 yentl/modelverse


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
							import unittest
import sys
import os
import tempfile

import sys
import time
import json
import urllib
import urllib2
import subprocess
import signal
import random
import operator

from collections import defaultdict

sys.path.append("interface/HUTN")
sys.path.append("scripts")
from hutn_compiler.compiler import main as do_compile
from check_objects import to_recompile

USERNAME = "test_task"
PARALLEL_PUSH = True

BOOTSTRAP_FOLDER_NAME = "bootstrap"
CURRENT_FOLDER_NAME = "performance"

MODELVERSE_TIMEOUT = 600

PORTS = set()

OPTIMIZATION_LEVEL_LEGACY_INTERPRETER = "legacy-interpreter"
OPTIMIZATION_LEVEL_INTERPRETER = "interpreter"
OPTIMIZATION_LEVEL_BYTECODE_INTERPRETER = "bytecode-interpreter"
OPTIMIZATION_LEVEL_BASELINE_JIT = "baseline-jit"
OPTIMIZATION_LEVEL_BASELINE_JIT_NO_THUNKS = "baseline-jit,no-thunks"
OPTIMIZATION_LEVEL_FAST_JIT = "fast-jit"
OPTIMIZATION_LEVEL_FAST_JIT_NO_NOPS = "fast-jit,no-insert-nops"
OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_LARGE_FUNCTIONS = "adaptive-jit-favor-large-functions"
OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_SMALL_FUNCTIONS = "adaptive-jit-favor-small-functions"
OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_LOOPS = "adaptive-jit-favor-loops"
OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_SMALL_LOOPS = "adaptive-jit-favor-small-loops"
ALL_OPTIMIZATION_LEVELS = [
    OPTIMIZATION_LEVEL_LEGACY_INTERPRETER,
    OPTIMIZATION_LEVEL_INTERPRETER,
    OPTIMIZATION_LEVEL_BYTECODE_INTERPRETER,
    OPTIMIZATION_LEVEL_BASELINE_JIT,
    OPTIMIZATION_LEVEL_BASELINE_JIT_NO_THUNKS,
    OPTIMIZATION_LEVEL_FAST_JIT,
    OPTIMIZATION_LEVEL_FAST_JIT_NO_NOPS,
    OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_LARGE_FUNCTIONS,
    OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_SMALL_FUNCTIONS,
    OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_LOOPS,
    OPTIMIZATION_LEVEL_ADAPTIVE_JIT_FAVOR_SMALL_LOOPS
]

class ModelverseTerminated(Exception):
    """An exception that tells the task that the Modelverse has terminated."""
    pass

def get_code_folder_name():
    """Gets the name of the code folder."""
    return '%s/code' % CURRENT_FOLDER_NAME

def get_free_port():
    """Gets a unique new port."""
    while 1:
        port = random.randint(10000, 20000)
        # Check if this port is in the set of ports.
        if port not in PORTS:
            # We have found a unique port. Add it to the set and return.
            PORTS.add(port)
            return port

def execute(scriptname, parameters=None, wait=False):
    """Runs a script."""
    if os.name not in ["nt", "posix"]:
        # Stop now, as we would have no clue on how to kill its subtree
        raise Exception("Unknown OS version: " + str(os.name))

    command = [sys.executable, "scripts/%s.py" % scriptname] + (
        [] if parameters is None else parameters)

    if wait:
        return subprocess.call(command, shell=False)
    else:
        return subprocess.Popen(command, shell=False)

def kill(process):
    """Kills the given process."""
    if os.name == "nt":
        subprocess.call(["taskkill", "/F", "/T", "/PID", "%i" % process.pid])
    elif os.name == "posix":
        subprocess.call(["pkill", "-P", "%i" % process.pid])

def set_input_data(address, data):
    """Sets the Modelverse program's input data."""
    if data is not None:
        urllib2.urlopen(
            urllib2.Request(
                address,
                urllib.urlencode(
                    {"op": "set_input", "data": json.dumps(data), "taskname": USERNAME})),
            timeout=MODELVERSE_TIMEOUT).read()
    else:
        return []

def compile_file(address, mod_filename, filename, mode, proc):
    """Compiles the given file."""
    # Load in the file required
    try:
        timeout_val = MODELVERSE_TIMEOUT
        taskname = str(random.random())
        while 1:
            proc2 = execute(
                "compile", [address, mod_filename, taskname, filename, mode], wait=False)

            if proc.returncode is not None:
                # Modelverse has already terminated, which isn't a good sign!
                raise Exception("Modelverse died!")

            while proc2.returncode is None:
                time.sleep(0.01)
                proc2.poll()
                timeout_val -= 0.01
                if timeout_val < 0:
                    kill(proc2)
                    print("Compilation timeout expired!")
                    return False

            if proc2.returncode != 2:
                break

        # Make sure everything stopped correctly
        assert proc2.returncode == 0
        if proc2.returncode != 0:
            return False
    except:
        raise
    finally:
        try:
            kill(proc2)
        except UnboundLocalError:
            pass

def compile_files(address, process, files, mode):
    """Compiles the given files in the given mode."""
    threads = []
    mod_files = []
    for filename in files:
        if os.path.isfile(filename):
            mod_filename = filename
        elif os.path.isfile("%s/%s" % (get_code_folder_name(), filename)):
            mod_filename = "%s/%s" % (get_code_folder_name(), filename)
        elif os.path.isfile("%s/%s" % (BOOTSTRAP_FOLDER_NAME, filename)):
            mod_filename = "%s/%s" % (BOOTSTRAP_FOLDER_NAME, filename)
        else:
            raise Exception("File not found: %s" % filename)
        mod_files.append(mod_filename)

    to_compile = to_recompile(address, mod_files)

    for mod_filename in to_compile:
        if mod_filename.endswith(".mvc"):
            model_mode = "MO"
            mod_files.remove(mod_filename)
        else:
            model_mode = mode
        if PARALLEL_PUSH:
            import threading
            threads.append(
                threading.Thread(
                    target=compile_file,
                    args=[address, mod_filename, mod_filename, model_mode, process]))
            threads[-1].start()
        else:
            compile_file(address, mod_filename, mod_filename, model_mode, process)

    if PARALLEL_PUSH:
        for t in threads:
            t.join()

    if mode[-1] == "O":
        # Fire up the linker
        val = execute("link_and_load", [address, USERNAME] + mod_files, wait=True)
        if val != 0:
            raise Exception("Linking error")

def run_file(files, parameters, mode, handle_output, optimization_level=None, jit_timing_log=None):
    """Compiles the given sequence of files, feeds them the given input in the given mode,
       and handles their output."""
    # Resolve file
    import os.path

    time.sleep(0.01)
    port = get_free_port()
    address = "http://127.0.0.1:%i" % port
    try:
        # Run Modelverse server
        modelverse_args = [str(port)]
        if optimization_level is not None:
            modelverse_args.append('--kernel=%s' % optimization_level)
        if jit_timing_log is not None:
            modelverse_args.append('--jit-timing-log=%s' % jit_timing_log)

        proc = execute("run_local_modelverse", modelverse_args, wait=False)

        # Compile, push and link the source code files.
        compile_files(address, proc, files, mode)

        # Send the request ...
        set_input_data(address, parameters)

        # ... and wait for replies
        while 1:
            val = urllib2.urlopen(
                urllib2.Request(
                    address,
                    urllib.urlencode({"op": "get_output", "taskname": USERNAME})),
                timeout=MODELVERSE_TIMEOUT).read()
            val = json.loads(val)

            if proc.returncode is not None:
                # Modelverse has terminated. This may or may not be what we want.
                raise ModelverseTerminated()

            if not handle_output(val):
                return

        # All passed!
        return
    except:
        raise
    finally:
        try:
            kill(proc)
        except UnboundLocalError:
            pass

def run_file_to_completion(files, parameters, mode):
    """Compiles the given sequence of files, feeds them the given input in the given mode,
       and then collects and returns output."""
    results = []
    def handle_output(output):
        """Appends the given output to the list of results."""
        results.append(output)
        return True

    try:
        run_file(files, parameters, mode, handle_output)
    except ModelverseTerminated:
        return results

def run_file_fixed_output_count(
        files, parameters, mode, output_count, optimization_level=None, jit_timing_log=None):
    """Compiles the given sequence of files, feeds them the given input in the given mode,
       and then collects and returns a fixed number of outputs."""
    results = []
    def handle_output(output):
        """Appends the given output to the list of results."""
        results.append(output)
        if len(results) < output_count:
            return True
        else:
            return False

    run_file(files, parameters, mode, handle_output, optimization_level, jit_timing_log)
    return results

def run_file_single_output(files, parameters, mode, optimization_level=None, jit_timing_log=None):
    """Compiles the given sequence of files, feeds them the given input in the given mode,
       and then collects and returns a single output."""
    return run_file_fixed_output_count(
        files, parameters, mode, 1, optimization_level, jit_timing_log)[0]

def mean(values):
    """Computes the arithmetic mean of the given values."""
    return float(sum(values)) / max(len(values), 1)

def parse_jit_timing_log(log_file):
    """Parses the JIT timing log entries from the given file."""
    results = []
    for line in log_file.readlines():
        first, _, data = line.strip().rpartition(':')
        _, _, name = first.strip().rpartition(' ')
        results.append((name, float(data)))
    return results

def run_perf_test(files, parameters, optimization_level, n_iterations=1):
    """Compiles the given sequence of files, feeds them the given input in the given mode,
       and then collects their output. This process is repeated n_iterations times. The
       return value is the average of all outputs, along with the mean total run-time."""
    test_runtimes = []
    total_runtimes = []
    compile_times = []
    for _ in xrange(n_iterations):
        try:
            timing_log = tempfile.mktemp()
            start_time = time.time()
            test_time = run_file_single_output(
                files, parameters, 'CO',
                optimization_level, timing_log)
            end_time = time.time()
            total_time = end_time - start_time
            test_runtimes.append(test_time)
            total_runtimes.append(total_time)
            with open(timing_log, 'r') as log_file:
                parsed_times = parse_jit_timing_log(log_file)

            compile_times.append(sum([data for _, data in parsed_times]))
        finally:
            os.remove(timing_log)
    return {
        TEST_TIME_QUANTITY: mean(test_runtimes),
        TOTAL_TIME_QUANTITY: mean(total_runtimes),
        COMPILE_TIME_QUANTITY: mean(compile_times)
    }

def get_expectation_checks(expected_values):
    """Converts the given sequence of expected values to a sequence of functions which tell
       if an input is allowed. Every function is accompanied by an expected value."""
    def get_single_expectation_checks(expectation):
        """Gets an expectation checker for a single expected value."""
        if isinstance(expectation, set):
            # We expect to receive a number of outputs equal to the size of the set, but their
            # order does not matter.
            for _ in xrange(len(expectation)):
                yield lambda val: val in expectation
        elif expectation is None:
            # Skip output value
            yield lambda _: True
        else:
            yield lambda val: val == expectation

    for expectation in expected_values:
        for checker in get_single_expectation_checks(expectation):
            yield checker, expectation

def run_correctness_test(files, parameters, expected, optimization_level):
    """Compiles the given sequence of files, feeds them the given input in the given mode,
       and then compares the output with the expected output. The return value is a dictionary
       of measured quantities."""
    checks = iter(list(get_expectation_checks(expected)))
    next_check = [next(checks)]
    def handle_output(output):
        """Checks the given output against the expected output."""
        check, expectation = next_check[0]
        print("Got %s, expect %s" % (output, expectation))
        assert check(output)

        try:
            next_check[0] = next(checks)
            return True
        except StopIteration:
            return False

    timing_log = tempfile.mktemp()
    start_time = time.time()
    try:
        run_file(files, parameters, 'CO', handle_output, optimization_level, timing_log)
        with open(timing_log, 'r') as log_file:
            parsed_times = parse_jit_timing_log(log_file)

        compile_time = sum([data for _, data in parsed_times])
    except ModelverseTerminated:
        return
    finally:
        os.remove(timing_log)
    end_time = time.time()
    return {
        TOTAL_TIME_QUANTITY: end_time - start_time,
        COMPILE_TIME_QUANTITY: compile_time
    }

def format_output(output):
    """Formats the output of `run_file_to_completion` as a string."""
    return '\n'.join(output)

def define_perf_test(target_class, test_function, optimization_level):
    """Defines a performance test in the given class. The performance test calls the given function
       at the given optimization level."""
    setattr(
        target_class,
        'test_%s' % optimization_level.replace('-', '_').lower(),
        lambda self: test_function(self, optimization_level))

def define_perf_tests(target_class, test_function, optimization_levels=None):
    """Defines performance tests in the given class. Each test calls the given function."""
    if optimization_levels is None:
        optimization_levels = ALL_OPTIMIZATION_LEVELS
    for opt_level in optimization_levels:
        define_perf_test(target_class, test_function, opt_level)

def get_model_constructor(code):
    # First change multiple spaces to a tab
    code_fragments = code.split("\n")
    code_fragments = [i for i in code_fragments if i.strip() != ""]
    code_fragments = [i.replace("    ", "\t") for i in code_fragments]
    initial_tabs = min([len(i) - len(i.lstrip("\t")) for i in code_fragments])
    code_fragments = [i[initial_tabs:] for i in code_fragments]
    code = "\n".join(code_fragments)

    with open("__model.mvc", "w") as f:
        f.write(code)
        f.flush()

    constructors = do_compile("__model.mvc", "interface/HUTN/grammars/modelling.g", "M") + ["exit"]

    return constructors

DEFAULT_PERF_FILE_NAME = 'perf_data.txt'

TOTAL_TIME_QUANTITY = 'total-runtime'
TEST_TIME_QUANTITY = 'test-runtime'
COMPILE_TIME_QUANTITY = 'compile-time'

def write_perf_entry_to_stream(
        test_name, optimization_level, quantity,
        result, output_stream):
    """Writes a performance measurement entry to the given stream."""
    output_stream.write('%s:%s:%s:%f\n' % (test_name, optimization_level, quantity, result))

def write_perf_to_file(
        test_name, optimization_level, runtimes, file_name=DEFAULT_PERF_FILE_NAME):
    """Writes performance data to a file."""
    with open(file_name, "a") as perf_file:
        for quantity, data_point in runtimes.items():
            write_perf_entry_to_stream(
                test_name, optimization_level, quantity, data_point, perf_file)

def write_total_runtime_to_file(
        test_name, optimization_level, total_runtime, file_name=DEFAULT_PERF_FILE_NAME):
    """Writes a total runtime entry to a file."""
    with open(file_name, "a") as perf_file:
        write_perf_entry_to_stream(
            test_name, optimization_level, TOTAL_TIME_QUANTITY, total_runtime, perf_file)

def parse_perf_data(file_name):
    """Parses the performance data in the given file."""
    results = defaultdict(lambda: defaultdict(list))
    with open(file_name, 'r') as perf_file:
        for line in perf_file.readlines():
            test_name, optimization_level, quantity, result = line.strip().split(':')
            results[quantity][optimization_level].append((test_name, float(result)))
    return {
        quantity: sorted(result_dict.items(), key=operator.itemgetter(0))
        for quantity, result_dict in results.items()
    }