Forráskód Böngészése

Merge branch 'optimize-matcher' (not yet seeing the performance enhancement i was hoping for...)

Joeri Exelmans 8 hónapja
szülő
commit
da5856b33b
3 módosított fájl, 155 hozzáadás és 68 törlés
  1. 63 28
      examples/performance/runner.py
  2. 23 2
      transformation/matcher.py
  3. 69 38
      transformation/vf2.py

+ 63 - 28
examples/performance/runner.py

@@ -1,3 +1,4 @@
+# Artificial model transformation thingy to measure performance
 # Model transformation experiment
 
 from state.devstate import DevState
@@ -31,48 +32,77 @@ if __name__ == "__main__":
         Many:Class
         ManyB:Class
         Other:Class
-        OtherB:Class
-        OtherC:Class
         ass:Association(Many->ManyB)
+        ass2:Association(Rare->Many)
     """
     dsl_mm_id = parser.parse_od(state, dsl_mm_cs, mm=scd_mmm_id)
     
     dsl_m_cs = """
         rare:Rare
+
         many0:Many
+        many0B:Many
         many1:Many
+        many1B:Many
         many2:Many
+        many2B:Many
         many3:Many
+        many3B:Many
         many4:Many
+        many4B:Many
+
         many5:ManyB
         many6:ManyB
         many7:ManyB
         many8:ManyB
+        many50:ManyB
+        many60:ManyB
+        many70:ManyB
+        many80:ManyB
+        many51:ManyB
+        many61:ManyB
+        many71:ManyB
+        many81:ManyB
+        many501:ManyB
+        many601:ManyB
+        many701:ManyB
+        many801:ManyB
+        many5Z:ManyB
+        many6Z:ManyB
+        many7Z:ManyB
+        many8Z:ManyB
+        many50Z:ManyB
+        many60Z:ManyB
+        many70Z:ManyB
+        many80Z:ManyB
+        many51Z:ManyB
+        many61Z:ManyB
+        many71Z:ManyB
+        many81Z:ManyB
+        many501Z:ManyB
+        many601Z:ManyB
+        many701Z:ManyB
+        many801Z:ManyB
+
+        Other0:Other
+        Other1:Other
+        Other2:Other
+        Other3:Other
+        Other0B:Other
+        Other1B:Other
+        Other2B:Other
+        Other3B:Other
+        Other0C:Other
+        Other1C:Other
+        Other2C:Other
+        Other3C:Other
 
         :ass (many2->many6)
         :ass (many3->many8)
 
-        # other0:Other
-        # other1:OtherC
-        # other2:Other
-        # other3:Other
-        # other4:Other
-        # other5:OtherB
-        # other6:OtherB
-        # other7:OtherB
-        # other8:OtherB
-        # other9:OtherB
-        # other10:OtherB
-        # other11:OtherC
-        # other12:OtherC
-        # other13:OtherC
-        # other14:OtherC
-
-        # other1099:OtherB
-        # other1199:OtherC
-        # other1299:OtherC
-        # other1399:OtherC
-        # other1499:OtherC
+        :ass2 (rare -> many0)
+        :ass2 (rare -> many1)
+        :ass2 (rare -> many2)
     """
     dsl_m_id = parser.parse_od(state, dsl_m_cs, mm=dsl_mm_id)
 
@@ -86,17 +116,22 @@ if __name__ == "__main__":
     # TODO: enable more powerful constraints
     pattern_cs = f"""
         # object to match
-        rare:{prefix}Rare {{
+        rare:RAM_Rare {{
         }}
 
-        many:{prefix}Many
-        manyB:{prefix}ManyB
-        manyB2:{prefix}ManyB
+        many:RAM_Many
+        manyB:RAM_ManyB
+        manyB2:RAM_ManyB
+
+        :RAM_ass (many -> manyB)
+        :RAM_ass (many -> manyB2)
+        :RAM_ass2 (rare -> many)
     """
     pattern_id = parser.parse_od(state, pattern_cs, mm=ramified_mm_id)
 
     with Timer("find all matches"):
-        matches = list(match_od(state, dsl_m_id, dsl_mm_id, pattern_id, ramified_mm_id))
+        for i in range(100):
+            matches = list(match_od(state, dsl_m_id, dsl_mm_id, pattern_id, ramified_mm_id))
 
 
     for match in matches:

+ 23 - 2
transformation/matcher.py

@@ -12,7 +12,7 @@ import itertools
 import re
 import functools
 
-from util.timer import Timer
+from util.timer import Timer, counted
 
 class _is_edge:
     def __repr__(self):
@@ -286,6 +286,27 @@ def match_od(state, host_m, host_mm, pattern_m, pattern_mm, pivot={}):
     g_names, guest = model_to_graph(state, pattern_m, pattern_mm,
         _filter=is_matchable)
 
+    # precompute the candidates for every guest vertex:
+    guest_to_host_candidate_vtxs = {}
+    vtxs_of_host_type = {}
+
+    for g_vtx in guest.vtxs:
+        object_node = g_vtx.node_id
+        if hasattr(g_vtx, 'typ'):
+            orig_class_node = ramify.get_original_type(bottom, g_vtx.typ)
+            orig_class_name = odapi.get_name(orig_class_node)
+            if orig_class_name in vtxs_of_host_type:
+                cands = vtxs_of_host_type[orig_class_name]
+            else:
+                cands = vtxs_of_host_type[orig_class_name] = len(odapi.get_all_instances(orig_class_name, include_subtypes=True))
+        else:
+            cands = len(host.vtxs)
+        guest_to_host_candidate_vtxs[g_vtx] = cands
+
+    # print(guest_to_host_candidate_vtxs)
+
+
+    # transform 'pivot' into something VF2 understands
     graph_pivot = {
         g_names[guest_name] : h_names[host_name]
             for guest_name, host_name in pivot.items()
@@ -339,7 +360,7 @@ def match_od(state, host_m, host_mm, pattern_m, pattern_mm, pivot={}):
 
 
     compare = RAMCompare(bottom, services_od.OD(host_mm, host_m, state))
-    matcher = MatcherVF2(host, guest, compare)
+    matcher = MatcherVF2(host, guest, compare, guest_to_host_candidate_vtxs)
     for m in matcher.match(graph_pivot):
         # Convert mapping
         name_mapping = {}

+ 69 - 38
transformation/vf2.py

@@ -4,7 +4,7 @@
 
 import itertools
 
-from util.timer import Timer
+from util.timer import Timer, counted
 
 # like finding the 'strongly connected componenets', but edges are navigable in any direction
 def find_connected_components(graph):
@@ -81,6 +81,11 @@ class MatcherState:
         state = MatcherState()
         state.h_unmatched_vtxs = [vtx for vtx in host.vtxs if vtx not in pivot.values()]
         state.g_unmatched_vtxs = [vtx for vtx in guest.vtxs if vtx not in pivot.keys()]
+        # if guest_to_host_candidates != None:
+        #     state.g_unmatched_vtxs.sort(
+        #         # performance thingy:
+        #         # try to match guest vtxs with few candidates first (fail early!):
+        #         key=lambda guest_vtx: guest_to_host_candidates.get(guest_vtx, 0))
         state.mapping_vtxs = pivot
         state.r_mapping_vtxs = { v: k for k,v in state.mapping_vtxs.items() }
         return state
@@ -129,27 +134,42 @@ class MatcherState:
         # return self.make_hashable().__repr__()
         return "VTXS: "+self.mapping_vtxs.__repr__()+"\nEDGES: "+self.mapping_edges.__repr__()
 
+
 class MatcherVF2:
     # Guest is the pattern
-    def __init__(self, host, guest, compare_fn):
+    def __init__(self, host, guest, compare_fn, guest_to_host_candidates=None):
         self.host = host
         self.guest = guest
         self.compare_fn = compare_fn
 
+        # map guest vertex to number of candidate vertices in host graph:
+        if guest_to_host_candidates != None:
+            self.guest_to_host_candidates = guest_to_host_candidates
+        else:
+            # atttempt to match every guest vertex with every host vertex (slow!)
+            self.guest_to_host_candidates = { g_vtx : len(host.vtxs) for g_vtx in guest.vtxs }
+
         # with Timer("find_connected_components - guest"):
         self.guest_vtx_to_component, self.guest_component_to_vtxs = find_connected_components(guest)
 
-        # print("number of guest connected components:", len(self.guest_component_to_vtxs))
+        for component in self.guest_component_to_vtxs:
+            pass
+            # sort vertices in component such that the vertices of the rarest type (with the fewest element) occurs first
+            component.sort(key=lambda guest_vtx: guest_to_host_candidates[guest_vtx])
+        if len(self.guest_component_to_vtxs) > 1:
+            print("warning: pattern has multiple components:", len(self.guest_component_to_vtxs))
 
     def match(self, pivot={}):
         yield from self._match(
             state=MatcherState.make_initial(self.host, self.guest, pivot),
             already_visited=set())
 
-
+    # @counted
     def _match(self, state, already_visited, indent=0):
         # input()
 
+        num_matches = 0
+
         def print_debug(*args):
             pass
             # print("  "*indent, *args) # uncomment to see a trace of the matching process
@@ -161,7 +181,7 @@ class MatcherVF2:
         if hashable in already_visited:
             print_debug("    SKIP - ALREADY VISITED")
             # print_debug("   ", hashable)
-            return
+            return 0
         # print_debug("   ", [hash(a) for a in already_visited])
         # print_debug("    ADD STATE")
         # print_debug("   ", hash(hashable))
@@ -173,7 +193,7 @@ class MatcherVF2:
             print_debug(" ", state.mapping_vtxs)
             print_debug(" ", state.mapping_edges)
             yield state
-            return
+            return 1
 
         def read_edge(edge, direction):
             if direction == "outgoing":
@@ -184,6 +204,7 @@ class MatcherVF2:
                 raise Exception("wtf!")
 
         def attempt_grow(direction, indent):
+            num_matches = 0
             for g_matched_vtx, h_matched_vtx in state.mapping_vtxs.items():
                 print_debug('attempt_grow', direction)
                 for g_candidate_edge in getattr(g_matched_vtx, direction):
@@ -204,69 +225,79 @@ class MatcherVF2:
                         print_debug('grow edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
                         new_state = state.grow_edge(h_candidate_edge, g_candidate_edge)
                         h_candidate_vtx = read_edge(h_candidate_edge, direction)
-                        yield from attempt_match_vtxs(
+                        num_matches += yield from attempt_match_vtxs(
                             new_state,
                             g_candidate_vtx,
                             h_candidate_vtx,
                             indent+1)
                         print_debug('backtrack edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
+            return num_matches
 
         def attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent):
             print_debug('attempt_match_vtxs')
             if g_candidate_vtx in state.mapping_vtxs:
                 if state.mapping_vtxs[g_candidate_vtx] != h_candidate_vtx:
                     print_debug("  nope, guest already mapped (mismatch)")
-                    return # guest vtx is already mapped but doesn't match host vtx
+                    return 0 # guest vtx is already mapped but doesn't match host vtx
             if h_candidate_vtx in state.r_mapping_vtxs:
                 if state.r_mapping_vtxs[h_candidate_vtx] != g_candidate_vtx:
                     print_debug("  nope, host already mapped (mismatch)")
-                    return # host vtx is already mapped but doesn't match guest vtx
+                    return 0 # host vtx is already mapped but doesn't match guest vtx
             g_outdegree = len(g_candidate_vtx.outgoing)
             h_outdegree = len(h_candidate_vtx.outgoing)
             if g_outdegree > h_outdegree:
                 print_debug("  nope, outdegree")
-                return
+                return 0
             g_indegree = len(g_candidate_vtx.incoming)
             h_indegree = len(h_candidate_vtx.incoming)
             if g_indegree > h_indegree:
                 print_debug("  nope, indegree")
-                return
+                return 0
             if not self.compare_fn(g_candidate_vtx, h_candidate_vtx):
                 print_debug("  nope, bad compare")
-                return
+                return 0
             new_state = state.grow_vtx(
                 h_candidate_vtx,
                 g_candidate_vtx)
             print_debug('grow vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx))
-            yield from self._match(new_state, already_visited, indent+1)
+            num_matches = yield from self._match(new_state, already_visited, indent+1)
             print_debug('backtrack vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx))
+            return num_matches
 
         print_debug('preferred...')
-        yield from attempt_grow('outgoing', indent+1)
-        yield from attempt_grow('incoming', indent+1)
-
-        print_debug('least preferred...')
-        if state.boundary != None:
-            g_boundary_vtx, _ = state.boundary
-            guest_boundary_component = self.guest_vtx_to_component[g_boundary_vtx]
-            # only try guest vertices that are in a different component (all vertices in the same component are already discovered via 'attempt_grow')
-            guest_components_to_try = (c for i,c in enumerate(self.guest_component_to_vtxs) if i != guest_boundary_component)
-            # for the host vertices however, we have to try them from all components, because different connected components of our pattern (=guest) could be mapped onto the same connected component in the host
-        else:
-            guest_components_to_try = self.guest_component_to_vtxs
-
-        for g_component in guest_components_to_try:
-                # we only need to pick ONE vertex from the component
-                # in the future, this can be optimized further by picking the vertex of the type with the fewest instances
-                g_candidate_vtx = g_component[0]
-                if g_candidate_vtx in state.mapping_vtxs:
-                    print_debug("skip (already matched)", g_candidate_vtx)
-                    continue
-                for h_candidate_vtx in state.h_unmatched_vtxs:
-                    yield from attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent+1)
-
-        if indent == 0:
-            print_debug('visited', len(already_visited), 'states total')
+        num_matches += yield from attempt_grow('outgoing', indent+1)
+        num_matches += yield from attempt_grow('incoming', indent+1)
+
+        if num_matches == 0:
+            print_debug('least preferred...')
+            if state.boundary != None:
+                g_boundary_vtx, _ = state.boundary
+                guest_boundary_component = self.guest_vtx_to_component[g_boundary_vtx]
+                # only try guest vertices that are in a different component (all vertices in the same component are already discovered via 'attempt_grow')
+                guest_components_to_try = (c for i,c in enumerate(self.guest_component_to_vtxs) if i != guest_boundary_component)
+                # for the host vertices however, we have to try them from all components, because different connected components of our pattern (=guest) could be mapped onto the same connected component in the host
+            else:
+                guest_components_to_try = self.guest_component_to_vtxs
+
+            for g_component in guest_components_to_try:
+                    # we only need to pick ONE vertex from the component
+                    # in the future, this can be optimized further by picking the vertex of the type with the fewest instances
+                    g_candidate_vtx = g_component[0]
+                    g_vtx_matches = 0
+                    g_vtx_max = self.guest_to_host_candidates[g_candidate_vtx]
+                    # print(' guest vtx has', g_vtx_max, ' host candidates')
+                    if g_candidate_vtx in state.mapping_vtxs:
+                        print_debug("skip (already matched)", g_candidate_vtx)
+                        continue
+                    for h_candidate_vtx in state.h_unmatched_vtxs:
+                        N = yield from attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent+1)
+                        g_vtx_matches += N > 0
+                        num_matches += N
+                        if g_vtx_matches == g_vtx_max:
+                            print("EARLY STOP")
+                            break # found all matches
+
+        return num_matches
 
 # demo time...
 if __name__ == "__main__":