123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386 |
- # This module contains a VF2-inspired graph matching algorithm
- # It defines its own Graph type, and can be used standalone (no dependencies on the rest of muMLE framework)
- # Author: Joeri Exelmans
- import itertools
- from util.timer import Timer, counted
- # like finding the 'strongly connected componenets', but edges are navigable in any direction
- def find_connected_components(graph):
- next_component = 0
- vtx_to_component = {}
- component_to_vtxs = []
- for vtx in graph.vtxs:
- if vtx in vtx_to_component:
- continue
- vtx_to_component[vtx] = next_component
- vtxs = []
- component_to_vtxs.append(vtxs)
- add_recursively(vtx, vtxs, vtx_to_component, next_component)
- next_component += 1
- return (vtx_to_component, component_to_vtxs)
- def add_recursively(vtx, vtxs: list, d: dict, component: int, already_visited: set = set()):
- if vtx in already_visited:
- return
- already_visited.add(vtx)
- vtxs.append(vtx)
- d[vtx] = component
- for edge in vtx.outgoing:
- add_recursively(edge.tgt, vtxs, d, component, already_visited)
- for edge in vtx.incoming:
- add_recursively(edge.src, vtxs, d, component, already_visited)
- class Graph:
- def __init__(self):
- self.vtxs = []
- self.edges = []
- class Vertex:
- def __init__(self, value):
- self.incoming = []
- self.outgoing = []
- self.value = value
- def __repr__(self):
- return f"V({self.value})"
- class Edge:
- def __init__(self, src: Vertex, tgt: Vertex, label=None):
- self.src = src
- self.tgt = tgt
- self.label = label
- # Add ourselves to src/tgt vertices
- self.src.outgoing.append(self)
- self.tgt.incoming.append(self)
- def __repr__(self):
- if self.label != None:
- return f"({self.src}--{self.label}->{self.tgt})"
- else:
- return f"({self.src}->{self.tgt})"
- class MatcherState:
- def __init__(self):
- self.mapping_vtxs = {} # guest -> host
- self.mapping_edges = {} # guest -> host
- self.r_mapping_vtxs = {} # host -> guest
- self.r_mapping_edges = {} # host -> guest
- self.h_unmatched_vtxs = []
- self.g_unmatched_vtxs = []
- # boundary is the most recently added (to the mapping) pair of (guest -> host) vertices
- self.boundary = None
- @staticmethod
- def make_initial(host, guest, pivot):
- state = MatcherState()
- state.h_unmatched_vtxs = [vtx for vtx in host.vtxs if vtx not in pivot.values()]
- state.g_unmatched_vtxs = [vtx for vtx in guest.vtxs if vtx not in pivot.keys()]
- # if guest_to_host_candidates != None:
- # state.g_unmatched_vtxs.sort(
- # # performance thingy:
- # # try to match guest vtxs with few candidates first (fail early!):
- # key=lambda guest_vtx: guest_to_host_candidates.get(guest_vtx, 0))
- state.mapping_vtxs = pivot
- state.r_mapping_vtxs = { v: k for k,v in state.mapping_vtxs.items() }
- return state
- # Grow the match set (creating a new copy)
- def grow_edge(self, host_edge, guest_edge):
- new_state = MatcherState()
- new_state.mapping_vtxs = self.mapping_vtxs
- new_state.mapping_edges = dict(self.mapping_edges)
- new_state.mapping_edges[guest_edge] = host_edge
- new_state.r_mapping_vtxs = self.r_mapping_vtxs
- new_state.r_mapping_edges = dict(self.r_mapping_edges)
- new_state.r_mapping_edges[host_edge] = guest_edge
- new_state.h_unmatched_vtxs = self.h_unmatched_vtxs
- new_state.g_unmatched_vtxs = self.g_unmatched_vtxs
- return new_state
- # Grow the match set (creating a new copy)
- def grow_vtx(self, host_vtx, guest_vtx):
- new_state = MatcherState()
- new_state.mapping_vtxs = dict(self.mapping_vtxs)
- new_state.mapping_vtxs[guest_vtx] = host_vtx
- new_state.mapping_edges = self.mapping_edges
- new_state.r_mapping_vtxs = dict(self.r_mapping_vtxs)
- new_state.r_mapping_vtxs[host_vtx] = guest_vtx
- new_state.r_mapping_edges = self.r_mapping_edges
- new_state.h_unmatched_vtxs = [h_vtx for h_vtx in self.h_unmatched_vtxs if h_vtx != host_vtx]
- new_state.g_unmatched_vtxs = [g_vtx for g_vtx in self.g_unmatched_vtxs if g_vtx != guest_vtx]
- new_state.boundary = (guest_vtx, host_vtx)
- return new_state
- def make_hashable(self):
- return frozenset(itertools.chain(
- ((gv,hv) for gv,hv in self.mapping_vtxs.items()),
- ((ge,he) for ge,he in self.mapping_edges.items()),
- ))
- def __repr__(self):
- # return self.make_hashable().__repr__()
- return "VTXS: "+self.mapping_vtxs.__repr__()+"\nEDGES: "+self.mapping_edges.__repr__()
- class MatcherVF2:
- # Guest is the pattern
- def __init__(self, host, guest, compare_fn, guest_to_host_candidates=None):
- self.host = host
- self.guest = guest
- self.compare_fn = compare_fn
- # map guest vertex to number of candidate vertices in host graph:
- if guest_to_host_candidates != None:
- self.guest_to_host_candidates = guest_to_host_candidates
- else:
- # atttempt to match every guest vertex with every host vertex (slow!)
- self.guest_to_host_candidates = { g_vtx : len(host.vtxs) for g_vtx in guest.vtxs }
- # with Timer("find_connected_components - guest"):
- self.guest_vtx_to_component, self.guest_component_to_vtxs = find_connected_components(guest)
- for component in self.guest_component_to_vtxs:
- pass
- # sort vertices in component such that the vertices of the rarest type (with the fewest element) occurs first
- component.sort(key=lambda guest_vtx: guest_to_host_candidates[guest_vtx])
- if len(self.guest_component_to_vtxs) > 1:
- print("warning: pattern has multiple components:", len(self.guest_component_to_vtxs))
- def match(self, pivot={}):
- yield from self._match(
- state=MatcherState.make_initial(self.host, self.guest, pivot),
- already_visited=set())
- # @counted
- def _match(self, state, already_visited, indent=0):
- # input()
- num_matches = 0
- def print_debug(*args):
- pass
- # print(" "*indent, *args) # uncomment to see a trace of the matching process
- print_debug("match")
- # Keep track of the states in the search space that we already visited
- hashable = state.make_hashable()
- if hashable in already_visited:
- print_debug(" SKIP - ALREADY VISITED")
- # print_debug(" ", hashable)
- return 0
- # print_debug(" ", [hash(a) for a in already_visited])
- # print_debug(" ADD STATE")
- # print_debug(" ", hash(hashable))
- already_visited.add(hashable)
- if len(state.mapping_vtxs) == len(self.guest.vtxs) and len(state.mapping_edges) == len(self.guest.edges):
- print_debug("GOT MATCH:")
- print_debug(" ", state.mapping_vtxs)
- print_debug(" ", state.mapping_edges)
- yield state
- return 1
- def read_edge(edge, direction):
- if direction == "outgoing":
- return edge.tgt
- elif direction == "incoming":
- return edge.src
- else:
- raise Exception("wtf!")
- def attempt_grow(direction, indent):
- num_matches = 0
- for g_matched_vtx, h_matched_vtx in state.mapping_vtxs.items():
- print_debug('attempt_grow', direction)
- for g_candidate_edge in getattr(g_matched_vtx, direction):
- print_debug('g_candidate_edge:', g_candidate_edge)
- g_candidate_vtx = read_edge(g_candidate_edge, direction)
- # g_to_skip_vtxs.add(g_candidate_vtx)
- if g_candidate_edge in state.mapping_edges:
- print_debug(" skip, guest edge already matched")
- continue # skip already matched guest edge
- for h_candidate_edge in getattr(h_matched_vtx, direction):
- if g_candidate_edge.label != h_candidate_edge.label:
- print_debug(" labels differ")
- continue
- print_debug('h_candidate_edge:', h_candidate_edge)
- if h_candidate_edge in state.r_mapping_edges:
- print_debug(" skip, host edge already matched")
- continue # skip already matched host edge
- print_debug('grow edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
- new_state = state.grow_edge(h_candidate_edge, g_candidate_edge)
- h_candidate_vtx = read_edge(h_candidate_edge, direction)
- num_matches += yield from attempt_match_vtxs(
- new_state,
- g_candidate_vtx,
- h_candidate_vtx,
- indent+1)
- print_debug('backtrack edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
- return num_matches
- def attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent):
- print_debug('attempt_match_vtxs')
- if g_candidate_vtx in state.mapping_vtxs:
- if state.mapping_vtxs[g_candidate_vtx] != h_candidate_vtx:
- print_debug(" nope, guest already mapped (mismatch)")
- return 0 # guest vtx is already mapped but doesn't match host vtx
- if h_candidate_vtx in state.r_mapping_vtxs:
- if state.r_mapping_vtxs[h_candidate_vtx] != g_candidate_vtx:
- print_debug(" nope, host already mapped (mismatch)")
- return 0 # host vtx is already mapped but doesn't match guest vtx
- g_outdegree = len(g_candidate_vtx.outgoing)
- h_outdegree = len(h_candidate_vtx.outgoing)
- if g_outdegree > h_outdegree:
- print_debug(" nope, outdegree")
- return 0
- g_indegree = len(g_candidate_vtx.incoming)
- h_indegree = len(h_candidate_vtx.incoming)
- if g_indegree > h_indegree:
- print_debug(" nope, indegree")
- return 0
- if not self.compare_fn(g_candidate_vtx, h_candidate_vtx):
- print_debug(" nope, bad compare")
- return 0
- new_state = state.grow_vtx(
- h_candidate_vtx,
- g_candidate_vtx)
- print_debug('grow vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx))
- num_matches = yield from self._match(new_state, already_visited, indent+1)
- print_debug('backtrack vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx))
- return num_matches
- print_debug('preferred...')
- num_matches += yield from attempt_grow('outgoing', indent+1)
- num_matches += yield from attempt_grow('incoming', indent+1)
- if num_matches == 0:
- print_debug('least preferred...')
- if state.boundary != None:
- g_boundary_vtx, _ = state.boundary
- guest_boundary_component = self.guest_vtx_to_component[g_boundary_vtx]
- # only try guest vertices that are in a different component (all vertices in the same component are already discovered via 'attempt_grow')
- guest_components_to_try = (c for i,c in enumerate(self.guest_component_to_vtxs) if i != guest_boundary_component)
- # for the host vertices however, we have to try them from all components, because different connected components of our pattern (=guest) could be mapped onto the same connected component in the host
- else:
- guest_components_to_try = self.guest_component_to_vtxs
- for g_component in guest_components_to_try:
- # we only need to pick ONE vertex from the component
- # in the future, this can be optimized further by picking the vertex of the type with the fewest instances
- g_candidate_vtx = g_component[0]
- g_vtx_matches = 0
- g_vtx_max = self.guest_to_host_candidates[g_candidate_vtx]
- # print(' guest vtx has', g_vtx_max, ' host candidates')
- if g_candidate_vtx in state.mapping_vtxs:
- print_debug("skip (already matched)", g_candidate_vtx)
- continue
- for h_candidate_vtx in state.h_unmatched_vtxs:
- N = yield from attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent+1)
- g_vtx_matches += N > 0
- num_matches += N
- if g_vtx_matches == g_vtx_max:
- print("EARLY STOP")
- break # found all matches
- return num_matches
- # demo time...
- if __name__ == "__main__":
- host = Graph()
- host.vtxs = [Vertex(0), Vertex(1), Vertex(2), Vertex(3)]
- host.edges = [
- Edge(host.vtxs[0], host.vtxs[1]),
- Edge(host.vtxs[1], host.vtxs[2]),
- Edge(host.vtxs[2], host.vtxs[0]),
- Edge(host.vtxs[2], host.vtxs[3]),
- Edge(host.vtxs[3], host.vtxs[2]),
- ]
- guest = Graph()
- guest.vtxs = [
- Vertex('v != 3'), # cannot be matched with Vertex(3) - changing this to True, you get 2 morphisms instead of one
- Vertex('True')] # can be matched with any node
- guest.edges = [
- # Look for a simple loop:
- Edge(guest.vtxs[0], guest.vtxs[1]),
- # Edge(guest.vtxs[1], guest.vtxs[0]),
- ]
- m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: eval(g_vtx.value, {}, {'v':h_vtx.value}))
- import time
- durations = 0
- iterations = 1
- print("Patience...")
- for n in range(iterations):
- time_start = time.perf_counter_ns()
- matches = [mm for mm in m.match()]
- time_end = time.perf_counter_ns()
- time_duration = time_end - time_start
- durations += time_duration
- print(f'{iterations} iterations, took {durations/1000000:.3f} ms, {durations/iterations/1000000:.3f} ms per iteration')
- print("found", len(matches), "matches")
- for mm in matches:
- print("match:")
- print(" ", mm.mapping_vtxs)
- print(" ", mm.mapping_edges)
- print("######################")
- host = Graph()
- host.vtxs = [
- Vertex('pony'), # 1
- Vertex('pony'), # 3
- Vertex('bear'),
- Vertex('bear'),
- ]
- host.edges = [
- # match:
- Edge(host.vtxs[0], host.vtxs[1]),
- Edge(host.vtxs[1], host.vtxs[0]),
- ]
- guest = Graph()
- guest.vtxs = [
- Vertex('pony'), # 0
- Vertex('pony'), # 1
- Vertex('bear')]
- guest.edges = [
- Edge(guest.vtxs[0], guest.vtxs[1]),
- Edge(guest.vtxs[1], guest.vtxs[0]),
- ]
- m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: g_vtx.value == h_vtx.value)
- import time
- durations = 0
- iterations = 1
- print("Patience...")
- for n in range(iterations):
- time_start = time.perf_counter_ns()
- matches = [mm for mm in m.match()]
- time_end = time.perf_counter_ns()
- time_duration = time_end - time_start
- durations += time_duration
- print(f'{iterations} iterations, took {durations/1000000:.3f} ms, {durations/iterations/1000000:.3f} ms per iteration')
- print("found", len(matches), "matches")
- for mm in matches:
- print("match:")
- print(" ", mm.mapping_vtxs)
- print(" ", mm.mapping_edges)
|