123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528 |
- # coding: utf-8
- """
- Author: Sten Vercamman
- Univeristy of Antwerp
- Example code for paper: Efficient model transformations for novices
- url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
- The main goal of this code is to give an overview, and an understandable
- implementation, of known techniques for pattern matching and solving the
- sub-graph homomorphism problem. The presented techniques do not include
- performance adaptations/optimizations. It is not optimized to be efficient
- but rather for the ease of understanding the workings of the algorithms.
- The paper does list some possible extensions/optimizations.
- It is intended as a guideline, even for novices, and provides an in-depth look
- at the workings behind various techniques for efficient pattern matching.
- """
- from searchGraph import *
- from enum import *
- # Enum for all primitive operation types
- # note: inc represent primitive operation in (as in is a reserved keyword in python)
- PRIM_OP = Enum(['lkp', 'inc', 'out', 'src', 'tgt'])
- class PlanGraph(object):
- """
- Holds the PlanGraph for a pattern.
- Can create the search plan of the pattern for a given SearchGraph.
- """
- def __init__(self, pattern):
- if not isinstance(pattern, Graph):
- raise TypeError('PlanGraph expects the pattern to be a Graph')
- # member variables:
- self.vertices = [] # will not be searched in
- self.edges = [] # will not be searched in
- # representation map, maps vertex from pattern to element from PlanGraph
- # (no need for edges)
- repr_map = {}
- # 1.1: for every vertex in the pattern graph,
- # create a vertex representing the pattern element
- for str_type, vertices in pattern.vertices.items():
- for vertex in vertices:
- # we only need to know the type of the vertex
- plan_vertex = Vertex(str_type)
- # and we need to know that is was a vertex
- plan_vertex.is_vertex = True
- # for re-linking the edges, we'll need to map the
- # vertex of the pattern to the plan_vertex
- repr_map[vertex] = plan_vertex
- # save created plan_vertex
- self.vertices.append(plan_vertex)
- # 1.2: for every edge in the pattern graph,
- # create a vertex representing the pattern elemen
- for str_type, edges in pattern.edges.items():
- for edge in edges:
- # we only need to know the type of the edge
- plan_vertex = Vertex(edge.type)
- # and we need to know that is was an edge
- plan_vertex.is_vertex = False
- # save created plan_vertex
- self.vertices.append(plan_vertex)
- # 4: for every element x from the PlanGraph
- # that represents an edge e in the pattern:
- # 4.1: create an edge labelled tgt from x to the vertex in the PlanGraph
- # representing the target vertex of e in the pattern graph,
- # and a reverted edge labelled in
- # 4.1.1: tgt:
- plan_edge = Edge(plan_vertex, repr_map[edge.tgt])
- # backup src and tgt (Edmonds might override it)
- plan_edge.orig_src = plan_edge.src
- plan_edge.orig_tgt = plan_edge.tgt
- plan_edge.label = PRIM_OP.tgt
- # link vertices connected to this plan_edge
- plan_edge.src.addOutgoingEdge(plan_edge)
- plan_edge.tgt.addIncomingEdge(plan_edge)
- # tgt and src cost are always 1, we use logaritmic cost,
- # (=> cost = ln(1) = 0.0) so that we do not need to minimaze
- # a product, but can minimize a sum
- # (as ln(c1...ck) = ln(c1) + ... + ln (ck))
- plan_edge.cost = 0.0
- # backup orig cost, as Edmonds changes cost
- plan_edge.orig_cost = plan_edge.cost
- # save created edge
- self.edges.append(plan_edge)
- # 4.1.2: in:
- plan_edge = Edge(repr_map[edge.tgt], plan_vertex)
- # backup src and tgt (Edmonds might override it)
- plan_edge.orig_src = plan_edge.src
- plan_edge.orig_tgt = plan_edge.tgt
- plan_edge.label = PRIM_OP.inc
- # link vertices connected to this plan_edge
- plan_edge.src.addOutgoingEdge(plan_edge)
- plan_edge.tgt.addIncomingEdge(plan_edge)
- # save created edge
- self.edges.append(plan_edge)
- # 4.2: create an edge labelled src from x to the vertex in the PlanGraph
- # representing the source vertex of e in the pattern graph
- # and a reverted edge labelled out
- # 4.2.1: src
- plan_edge = Edge(plan_vertex, repr_map[edge.src])
- # backup src and tgt (Edmonds might override it)
- plan_edge.orig_src = plan_edge.src
- plan_edge.orig_tgt = plan_edge.tgt
- plan_edge.label = PRIM_OP.src
- # link vertices connected to this plan_edge
- plan_edge.src.addOutgoingEdge(plan_edge)
- plan_edge.tgt.addIncomingEdge(plan_edge)
- # tgt and src cost are always 1, we use logaritmic cost,
- # (=> cost = ln(1) = 0.0) so that we do not need to minimaze
- # a product, but can minimize a sum
- # (as ln(c1...ck) = ln(c1) + ... + ln (ck))
- plan_edge.cost = 0.0
- # backup orig cost, as Edmonds changes cost
- plan_edge.orig_cost = plan_edge.cost
- # save created edge
- self.edges.append(plan_edge)
- # 4.2.2: out
- plan_edge = Edge(repr_map[edge.src], plan_vertex)
- # backup src and tgt (Edmonds might override it)
- plan_edge.orig_src = plan_edge.src
- plan_edge.orig_tgt = plan_edge.tgt
- plan_edge.label = PRIM_OP.out
- # link vertices connected to this plan_edge
- plan_edge.src.addOutgoingEdge(plan_edge)
- plan_edge.tgt.addIncomingEdge(plan_edge)
- # save created edge
- self.edges.append(plan_edge)
- # 2: create a root vertex
- self.root = Vertex('root')
- # don't add it to the vertices
- # 3: for each element in the PlanGraph (that is not the root vertex),
- # create an edge from the root to it, and label it lkp
- for vertex in self.vertices:
- plan_edge = Edge(self.root, vertex)
- # backup src and tgt (Edmonds might override it)
- plan_edge.orig_src = plan_edge.src
- plan_edge.orig_tgt = plan_edge.tgt
- plan_edge.label = PRIM_OP.lkp
- # link vertices connected to this plan_edge
- plan_edge.src.addOutgoingEdge(plan_edge)
- plan_edge.tgt.addIncomingEdge(plan_edge)
- # save created edge
- self.edges.append(plan_edge)
- def updatePlanCost(self, graph):
- """
- returns True if sucessfully updated cost,
- returns False if a type in the pattern is not in the graph.
- """
- if not isinstance(graph, SearchGraph):
- raise TypeError('updatePlanCost expects a SearchGraph')
- # update, lkp, in and out (not src and tgt as they are constant)
- for edge in self.edges:
- if edge.label == PRIM_OP.lkp:
- edge.cost = graph.getCostLkp(edge.tgt.type, edge.tgt.is_vertex)
- if edge.cost == None:
- print('failed lkp')
- return False
- elif edge.label == PRIM_OP.inc:
- # in(v, e), binds an incoming edge e from an already bound vertex v,
- # depends on the number of incoming edges of type e for the vertex type
- edge.cost = graph.getCostInc(edge.src.type, edge.tgt.type)
- if edge.cost == None:
- print('failed in')
- return False
- elif edge.label == PRIM_OP.out:
- # (analogue for out(v, e))
- edge.cost = graph.getCostOut(edge.src.type, edge.tgt.type)
- if edge.cost == None:
- print('failed out')
- return False
- # else: ignore src and tgt
- # backup orig cost, as Edmonds changes cost
- edge.orig_cost = edge.cost
- return True
- def Edmonds(self, searchGraph):
- """
- Returns the minimum directed spanning tree (MDST)
- for the pattern and the provided graph.
- Returns None if it is impossible to find the pattern in the Graph
- (vertex type of edge type from pattern not in Graph).
- """
- # update the cost for the PlanGraph
- if not self.updatePlanCost(searchGraph):
- print('type in pattern not found in Graph (in Edmonds)')
- # (returns False if a type in the pattern can not be found in the graph)
- return None
- # Complete Edmonds algorithm has optimization steps:
- # a: remove edges entering the root
- # b: merge parallel edges from same src to same tgt with mim weight
- # we can ignore this as:
- # a: the root does not have incoming edges
- # b: the PlanGraph does not have such paralllel edges
- # 1: for each node v (other than root), find incoming edge with lowest weight
- # insert those
- pi_v = {}
- for plan_vertex in self.vertices:
- min_weight = float('infinity')
- min_edge = None
- for plan_edge in plan_vertex.incoming_edges:
- if plan_edge.cost < min_weight:
- min_weight = plan_edge.cost
- min_edge = plan_edge
- # save plan_vertex and it's minimum incoming edge
- pi_v[plan_vertex] = min_edge
- if min_edge == None:
- raise RuntimeError('baka: no min_edge found')
- def getCycle(vertex, reverse_graph, visited):
- """
- Walk from vertex to root, we walk in a reverse order, as each vertex
- only has one incoming edge, so we walk to the source of that incoming
- edge. We stop when we already visited a vertex we walked on.
- In both cases we return None.
- When we visit a vertex from our current path, we return that cycle,
- by first removing its tail.
- """
- def addToVisited(walked, visited):
- for vertex in walked:
- visited.add(vertex)
- walked = [] # we could only save it once, but we need order
- current_path = set() # and lookup in an array is slower than in set
- # we asume root is in visited (it must be in it)
- while vertex not in visited:
- if vertex in current_path:
- # we found a cycle, the cycle however might look like a: O--,
- # g f e where we first visited a, then b, c, d,...
- # h d c b a k points back to d, completing a cycle,
- # i j k but c b a is the tail that does not belong
- # in the cycle, removing this is "easy" as we know that
- # we first visited the tail, so they are the first elements
- # in our walked path
- for tail_part in walked:
- if tail_part != vertex:
- current_path.remove(tail_part)
- else:
- break
- addToVisited(walked, visited)
- return current_path
- current_path.add(vertex)
- walked.append(vertex)
- # by definition, an MDST only has one incoming edge per vertex
- # so we follow it upwards
- # vertex <--(minimal edge)-- src
- vertex = reverse_graph[vertex].src
- # no cycle found (the current path let to a visited vertex)
- addToVisited(walked, visited) # add walked to visited
- return None
- class VertexGraph(Vertex):
- """
- Acts as a super vertex, holds a subgraph (that is/was once a cyle).
- Uses for Edmonds contractions step.
- The incoming edges are the edges leading to the vertices in the
- VertexGraph (they exclude edges from a vertex in the cycle to
- another vertex in the cycle).
- Analogue for outgoing edges.
- """
- def __init__(self, cycle, reverseGraph):
- # Call parent class constructor
- str_type = ''
- for vertex in cycle:
- str_type += str(vertex.type)
- Vertex.__init__(self, str_type)
- # member variables:
- self.internalMDST = {}
- minIntWeight = self.findMinIntWeight(cycle, reverseGraph)
- self.updateMinExtEdge(minIntWeight, reverseGraph)
- def findMinIntWeight(self, cycle, reverseGraph):
- """
- Find the the smallest cost of the cycle his internal incoming edges.
- (Also save its internalMDST (currently a cycle).)
- (The VertexGraph formed by the cycle will be added to the
- reverseGraph by calling findMinExtEdge.)
- """
- minIntWeight = float('infinity')
- cycleEdges = []
- origTgts = []
- for cyclePart in cycle:
- cycleEdges.append(reverseGraph[cyclePart])
- origTgts.append(reverseGraph[cyclePart].orig_tgt)
- for vertex in cycle:
- # add incoming edges to this VertexGraph
- for inc_edge in vertex.incoming_edges:
- # edge from within the cycle
- if inc_edge.src in cycle:
- minIntWeight = min(minIntWeight, inc_edge.cost)
- else:
- # edge from outside the cycle
- self.addIncomingEdge(inc_edge)
- # add outgoing edges to this VertexGraph
- for out_edge in vertex.outgoing_edges:
- if out_edge.tgt not in cycle:
- # edge leaves the cycle
- self.addOutgoingEdge(out_edge)
- # update src to this VertexGraph
- out_edge.src = self
- # save internal MDST
- min_edge = reverseGraph[vertex]
- if min_edge.src in cycle:
- self.internalMDST[vertex] = min_edge
- else:
- raise TypeError('how is this a cycle')
- return minIntWeight
- def updateMinExtEdge(self, minIntWeight, reverseGraph):
- """
- Modifies all external incoming edges their cost and finds the
- minimum external incoming edge with this modified weight.
- This found edge will break the cycle, update the internalMDST
- from a cycle to an MDST, updates the reverseGraph to include
- the vertexGraph.
- """
- minExt = None
- minModWeight = -float('infinity')
- # Find incoming edge from outside of the circle with minimal
- # modified cost. This edge will break the cycle.
- for inc_edge in self.incoming_edges:
- # An incoming edge (with src from within the cycle), can be
- # from a contracted part of the graph. Assume bc is a
- # contracted part (VertexGraph) a, bc is a newly formed
- # cycle (due to the breaking of the previous cycle bc). bc
- # has at least lkp incoming edges to b and c, but we should
- # not consider the lkp of c to break the cycle.
- # If we want to break a, bc, select plausable edges,
- # /<--\
- # a bc bc's MDST b <-- c
- # \-->/
- # by looking at their original targets.
- # (if cycle inc_edge.orig_tgt == external inc_edge.orig_tgt)
- if reverseGraph[inc_edge.tgt].orig_tgt == inc_edge.orig_tgt:
- # modify costL cost of inc_edge -
- # (cost of previously choosen minimum edge to cycle vertex - minIntWeight)
- inc_edge.cost -= (reverseGraph[inc_edge.tgt].cost - minIntWeight)
- if minExt is None or minModWeight > inc_edge.cost:
- # save better edge from outside of the cycle
- minExt = inc_edge
- minModWeight = inc_edge.cost
- # Example: a, b is a cycle (we know that there are no other
- # incoming edges to a and/or b, as there is on;y exactly one
- # incoming edge per vertex), and the arow from c to b represents
- # the minExt edge. We will remove the bottem arrow (from a to b)
- # /<--\ and save the minExt edge in the reverseGraph.
- # a b <-- c This breaks the cycle. As the internalMDST
- # \-->/ saves the intenal MDST, and currently still
- # holds a cycle, we have to remove it from the internalMDST.
- # We have to remove all vertex bindings of the cycle from the
- # reverseGraph (as it is contracted into a single VertexGraph),
- # and store the minExt edge to this VertexGraph in it.
- for int_vertex, _ in self.internalMDST.items():
- del reverseGraph[int_vertex] # remove cycle from reverseGraph
- del self.internalMDST[minExt.tgt] # remove/break cycle
- for inc_edge in self.incoming_edges:
- # update inc_edge's target to this VertexGraph
- inc_edge.tgt = self
- # save minExt edge to this VertexGraph in the reverseGraph
- reverseGraph[self] = minExt
- while True:
- # 2: find all cycles:
- cycles = []
- visited = set([self.root]) # root does not have incoming edges,
- for vertex in list(pi_v.keys()): # it can not be part of a cycle
- if vertex not in visited: # getCycle depends on root being in visited
- cycle = getCycle(vertex, pi_v, visited)
- if cycle != None:
- cycles.append(cycle)
- # 2: if the set of edges {pi(v), v} does not contain any cycles,
- # Then we found our minimum directed spanning tree
- # otherwise, we'll have to resolve the cycles
- if len(cycles) == 0:
- break
- # 3: For each formed cycle:
- # 3a: find internal incoming edge with the smallest cost
- # 3b: modify the cost of each arc which enters the cycle
- # 3c: replace smallert internal edge with the modified edge which has the smallest cost
- for cycle in cycles:
- # Breaks a cycle by:
- # - contracting cycle into VertexGraph
- # - finding the internal incoming edge with the smallest cost
- # - modify the cost of each arc which enters the cycle
- # - replacing the smallest internal edge with the modified edge which has the smallest cost
- # - changing reverseGraph accordingly (removes elements from cycle, ads vertexGraph)
- # (This will find a solution as the graph keeps shrinking with every cycle,
- # in the worst case the same amount as there are vertices, until
- # onlty the root and one vertexGraph remains)
- vertexGraph = VertexGraph(cycle, pi_v)
- class SortedContainer(object):
- """
- A container that keeps elemets sorted based on a given sortValue.
- Elements with the same value, will be returned in the order they got inserted.
- """
- def __init__(self):
- # member variables:
- self.keys = [] # stores key in sorted order (sorted when pop gets called)
- self.sorted = {} # {key, [elems with same key]}
- def add(self, sortValue, element):
- """
- Adds element with sortValue to the SortedContainer.
- """
- elems = self.sorted.get(sortValue)
- if elems == None:
- self.sorted[sortValue] = [element]
- self.keys.append(sortValue)
- else:
- elems.append(element)
- def pop(self):
- """
- Sorts the SortedContainer, returns element with smallest sortValue.
- """
- self.keys.sort()
- elems = self.sorted[self.keys[0]]
- elem = elems.pop()
- if len(elems) == 0:
- del self.sorted[self.keys[0]]
- del self.keys[0]
- return elem
- def empty(self):
- """
- Returns whether or not the sorted container is empty.
- """
- return (len(self.keys) == 0)
- def createPRIM_OP(edge, inc_cost=True):
- """
- Helper function to keep argument list short,
- return contracted data for a PRIM_OP.
- """
- if edge.label == PRIM_OP.inc or edge.label == PRIM_OP.out:
- if inc_cost: # op # vertex type # actual edge type
- return (edge.label, edge.orig_src.type, edge.orig_tgt.type, edge.cost)
- else:
- return (edge.label, edge.orig_src.type, edge.orig_tgt.type)
- elif edge.label == PRIM_OP.lkp:
- if inc_cost: # op # vertex/edge type # is vertex or edge
- return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex, edge.cost)
- else:
- return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex)
- else: # src, tgt operation
- if inc_cost: # op # actual edge type
- return (edge.label, edge.orig_src.type, edge.cost)
- else:
- return (edge.label, edge.orig_src.type)
- def flattenReverseGraph(vertex, inc_edge, reverseGraph):
- """
- Flattens the reverseGraph, so that the vertexGraph node can get
- processed to create a forwardGraph.
- """
- if not isinstance(vertex, VertexGraph):
- reverseGraph[vertex] = inc_edge
- else:
- reverseGraph[inc_edge.orig_tgt] = inc_edge
- for vg, eg in inc_edge.tgt.internalMDST.items():
- flattenReverseGraph(vg, eg, reverseGraph)
- if isinstance(inc_edge.src, VertexGraph):
- for vg, eg in inc_edge.src.internalMDST.items():
- flattenReverseGraph(vg, eg, reverseGraph)
- def createForwardGraph(vertex, inc_edge, forwardGraph):
- """
- Create a forwardGraph, keeping in mind that their can be vertexGraph
- in the reverseGraph.
- """
- if not isinstance(vertex, VertexGraph):
- forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
- else:
- forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
- for vg, eg in vertex.internalMDST.items():
- createForwardGraph(vg, eg, forwardGraph)
- MDST = []
- # pi_v contains {vertex, incoming_edge}
- # we want to start from root and follow the outgoing edges
- # so we have to build the forwardGraph graph for pi_v
- # (Except for the root (has 0), each vertex has exactly one incoming edge,
- # but might have multiple outgoing edges)
- forwardGraph = {} # {vertex, [outgoing edge 1, ... ] }
- reverseGraph = {}
- # flatten reverseGraph (for the vertexGraph elements)
- for v, e in pi_v.items():
- flattenReverseGraph(v, e, reverseGraph)
- # create the forwardGraph
- for vertex, edge in reverseGraph.items():
- createForwardGraph(vertex, edge, forwardGraph)
- # create the MDST in a best first manner (lowest value first)
- current = SortedContainer() # allows easy walking true tree
- for edge in forwardGraph[self.root]:
- current.add(edge.orig_cost, edge) # use orig cost, not modified
- while current.empty() != True:
- p_op = current.pop() # p_op contains an outgoing edge
- MDST.append(createPRIM_OP(p_op))
- for edge in forwardGraph.get(p_op.orig_tgt, []):
- current.add(edge.orig_cost, edge)
- return MDST
|