planGraph.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. # coding: utf-8
  2. """
  3. Author: Sten Vercamman
  4. Univeristy of Antwerp
  5. Example code for paper: Efficient model transformations for novices
  6. url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
  7. The main goal of this code is to give an overview, and an understandable
  8. implementation, of known techniques for pattern matching and solving the
  9. sub-graph homomorphism problem. The presented techniques do not include
  10. performance adaptations/optimizations. It is not optimized to be efficient
  11. but rather for the ease of understanding the workings of the algorithms.
  12. The paper does list some possible extensions/optimizations.
  13. It is intended as a guideline, even for novices, and provides an in-depth look
  14. at the workings behind various techniques for efficient pattern matching.
  15. """
  16. from searchGraph import *
  17. from enum import *
  18. # Enum for all primitive operation types
  19. # note: inc represent primitive operation in (as in is a reserved keyword in python)
  20. PRIM_OP = Enum(['lkp', 'inc', 'out', 'src', 'tgt'])
  21. class PlanGraph(object):
  22. """
  23. Holds the PlanGraph for a pattern.
  24. Can create the search plan of the pattern for a given SearchGraph.
  25. """
  26. def __init__(self, pattern):
  27. if not isinstance(pattern, Graph):
  28. raise TypeError('PlanGraph expects the pattern to be a Graph')
  29. # member variables:
  30. self.vertices = [] # will not be searched in
  31. self.edges = [] # will not be searched in
  32. # representation map, maps vertex from pattern to element from PlanGraph
  33. # (no need for edges)
  34. repr_map = {}
  35. # 1.1: for every vertex in the pattern graph,
  36. # create a vertex representing the pattern element
  37. for str_type, vertices in pattern.vertices.items():
  38. for vertex in vertices:
  39. # we only need to know the type of the vertex
  40. plan_vertex = Vertex(str_type)
  41. # and we need to know that is was a vertex
  42. plan_vertex.is_vertex = True
  43. # for re-linking the edges, we'll need to map the
  44. # vertex of the pattern to the plan_vertex
  45. repr_map[vertex] = plan_vertex
  46. # save created plan_vertex
  47. self.vertices.append(plan_vertex)
  48. # 1.2: for every edge in the pattern graph,
  49. # create a vertex representing the pattern elemen
  50. for str_type, edges in pattern.edges.items():
  51. for edge in edges:
  52. # we only need to know the type of the edge
  53. plan_vertex = Vertex(edge.type)
  54. # and we need to know that is was an edge
  55. plan_vertex.is_vertex = False
  56. # save created plan_vertex
  57. self.vertices.append(plan_vertex)
  58. # 4: for every element x from the PlanGraph
  59. # that represents an edge e in the pattern:
  60. # 4.1: create an edge labelled tgt from x to the vertex in the PlanGraph
  61. # representing the target vertex of e in the pattern graph,
  62. # and a reverted edge labelled in
  63. # 4.1.1: tgt:
  64. plan_edge = Edge(plan_vertex, repr_map[edge.tgt])
  65. # backup src and tgt (Edmonds might override it)
  66. plan_edge.orig_src = plan_edge.src
  67. plan_edge.orig_tgt = plan_edge.tgt
  68. plan_edge.label = PRIM_OP.tgt
  69. # link vertices connected to this plan_edge
  70. plan_edge.src.addOutgoingEdge(plan_edge)
  71. plan_edge.tgt.addIncomingEdge(plan_edge)
  72. # tgt and src cost are always 1, we use logaritmic cost,
  73. # (=> cost = ln(1) = 0.0) so that we do not need to minimaze
  74. # a product, but can minimize a sum
  75. # (as ln(c1...ck) = ln(c1) + ... + ln (ck))
  76. plan_edge.cost = 0.0
  77. # backup orig cost, as Edmonds changes cost
  78. plan_edge.orig_cost = plan_edge.cost
  79. # save created edge
  80. self.edges.append(plan_edge)
  81. # 4.1.2: in:
  82. plan_edge = Edge(repr_map[edge.tgt], plan_vertex)
  83. # backup src and tgt (Edmonds might override it)
  84. plan_edge.orig_src = plan_edge.src
  85. plan_edge.orig_tgt = plan_edge.tgt
  86. plan_edge.label = PRIM_OP.inc
  87. # link vertices connected to this plan_edge
  88. plan_edge.src.addOutgoingEdge(plan_edge)
  89. plan_edge.tgt.addIncomingEdge(plan_edge)
  90. # save created edge
  91. self.edges.append(plan_edge)
  92. # 4.2: create an edge labelled src from x to the vertex in the PlanGraph
  93. # representing the source vertex of e in the pattern graph
  94. # and a reverted edge labelled out
  95. # 4.2.1: src
  96. plan_edge = Edge(plan_vertex, repr_map[edge.src])
  97. # backup src and tgt (Edmonds might override it)
  98. plan_edge.orig_src = plan_edge.src
  99. plan_edge.orig_tgt = plan_edge.tgt
  100. plan_edge.label = PRIM_OP.src
  101. # link vertices connected to this plan_edge
  102. plan_edge.src.addOutgoingEdge(plan_edge)
  103. plan_edge.tgt.addIncomingEdge(plan_edge)
  104. # tgt and src cost are always 1, we use logaritmic cost,
  105. # (=> cost = ln(1) = 0.0) so that we do not need to minimaze
  106. # a product, but can minimize a sum
  107. # (as ln(c1...ck) = ln(c1) + ... + ln (ck))
  108. plan_edge.cost = 0.0
  109. # backup orig cost, as Edmonds changes cost
  110. plan_edge.orig_cost = plan_edge.cost
  111. # save created edge
  112. self.edges.append(plan_edge)
  113. # 4.2.2: out
  114. plan_edge = Edge(repr_map[edge.src], plan_vertex)
  115. # backup src and tgt (Edmonds might override it)
  116. plan_edge.orig_src = plan_edge.src
  117. plan_edge.orig_tgt = plan_edge.tgt
  118. plan_edge.label = PRIM_OP.out
  119. # link vertices connected to this plan_edge
  120. plan_edge.src.addOutgoingEdge(plan_edge)
  121. plan_edge.tgt.addIncomingEdge(plan_edge)
  122. # save created edge
  123. self.edges.append(plan_edge)
  124. # 2: create a root vertex
  125. self.root = Vertex('root')
  126. # don't add it to the vertices
  127. # 3: for each element in the PlanGraph (that is not the root vertex),
  128. # create an edge from the root to it, and label it lkp
  129. for vertex in self.vertices:
  130. plan_edge = Edge(self.root, vertex)
  131. # backup src and tgt (Edmonds might override it)
  132. plan_edge.orig_src = plan_edge.src
  133. plan_edge.orig_tgt = plan_edge.tgt
  134. plan_edge.label = PRIM_OP.lkp
  135. # link vertices connected to this plan_edge
  136. plan_edge.src.addOutgoingEdge(plan_edge)
  137. plan_edge.tgt.addIncomingEdge(plan_edge)
  138. # save created edge
  139. self.edges.append(plan_edge)
  140. def updatePlanCost(self, graph):
  141. """
  142. returns True if sucessfully updated cost,
  143. returns False if a type in the pattern is not in the graph.
  144. """
  145. if not isinstance(graph, SearchGraph):
  146. raise TypeError('updatePlanCost expects a SearchGraph')
  147. # update, lkp, in and out (not src and tgt as they are constant)
  148. for edge in self.edges:
  149. if edge.label == PRIM_OP.lkp:
  150. edge.cost = graph.getCostLkp(edge.tgt.type, edge.tgt.is_vertex)
  151. if edge.cost == None:
  152. print('failed lkp')
  153. return False
  154. elif edge.label == PRIM_OP.inc:
  155. # in(v, e), binds an incoming edge e from an already bound vertex v,
  156. # depends on the number of incoming edges of type e for the vertex type
  157. edge.cost = graph.getCostInc(edge.src.type, edge.tgt.type)
  158. if edge.cost == None:
  159. print('failed in')
  160. return False
  161. elif edge.label == PRIM_OP.out:
  162. # (analogue for out(v, e))
  163. edge.cost = graph.getCostOut(edge.src.type, edge.tgt.type)
  164. if edge.cost == None:
  165. print('failed out')
  166. return False
  167. # else: ignore src and tgt
  168. # backup orig cost, as Edmonds changes cost
  169. edge.orig_cost = edge.cost
  170. return True
  171. def Edmonds(self, searchGraph):
  172. """
  173. Returns the minimum directed spanning tree (MDST)
  174. for the pattern and the provided graph.
  175. Returns None if it is impossible to find the pattern in the Graph
  176. (vertex type of edge type from pattern not in Graph).
  177. """
  178. # update the cost for the PlanGraph
  179. if not self.updatePlanCost(searchGraph):
  180. print('type in pattern not found in Graph (in Edmonds)')
  181. # (returns False if a type in the pattern can not be found in the graph)
  182. return None
  183. # Complete Edmonds algorithm has optimization steps:
  184. # a: remove edges entering the root
  185. # b: merge parallel edges from same src to same tgt with mim weight
  186. # we can ignore this as:
  187. # a: the root does not have incoming edges
  188. # b: the PlanGraph does not have such paralllel edges
  189. # 1: for each node v (other than root), find incoming edge with lowest weight
  190. # insert those
  191. pi_v = {}
  192. for plan_vertex in self.vertices:
  193. min_weight = float('infinity')
  194. min_edge = None
  195. for plan_edge in plan_vertex.incoming_edges:
  196. if plan_edge.cost < min_weight:
  197. min_weight = plan_edge.cost
  198. min_edge = plan_edge
  199. # save plan_vertex and it's minimum incoming edge
  200. pi_v[plan_vertex] = min_edge
  201. if min_edge == None:
  202. raise RuntimeError('baka: no min_edge found')
  203. def getCycle(vertex, reverse_graph, visited):
  204. """
  205. Walk from vertex to root, we walk in a reverse order, as each vertex
  206. only has one incoming edge, so we walk to the source of that incoming
  207. edge. We stop when we already visited a vertex we walked on.
  208. In both cases we return None.
  209. When we visit a vertex from our current path, we return that cycle,
  210. by first removing its tail.
  211. """
  212. def addToVisited(walked, visited):
  213. for vertex in walked:
  214. visited.add(vertex)
  215. walked = [] # we could only save it once, but we need order
  216. current_path = set() # and lookup in an array is slower than in set
  217. # we asume root is in visited (it must be in it)
  218. while vertex not in visited:
  219. if vertex in current_path:
  220. # we found a cycle, the cycle however might look like a: O--,
  221. # g f e where we first visited a, then b, c, d,...
  222. # h d c b a k points back to d, completing a cycle,
  223. # i j k but c b a is the tail that does not belong
  224. # in the cycle, removing this is "easy" as we know that
  225. # we first visited the tail, so they are the first elements
  226. # in our walked path
  227. for tail_part in walked:
  228. if tail_part != vertex:
  229. current_path.remove(tail_part)
  230. else:
  231. break
  232. addToVisited(walked, visited)
  233. return current_path
  234. current_path.add(vertex)
  235. walked.append(vertex)
  236. # by definition, an MDST only has one incoming edge per vertex
  237. # so we follow it upwards
  238. # vertex <--(minimal edge)-- src
  239. vertex = reverse_graph[vertex].src
  240. # no cycle found (the current path let to a visited vertex)
  241. addToVisited(walked, visited) # add walked to visited
  242. return None
  243. class VertexGraph(Vertex):
  244. """
  245. Acts as a super vertex, holds a subgraph (that is/was once a cyle).
  246. Uses for Edmonds contractions step.
  247. The incoming edges are the edges leading to the vertices in the
  248. VertexGraph (they exclude edges from a vertex in the cycle to
  249. another vertex in the cycle).
  250. Analogue for outgoing edges.
  251. """
  252. def __init__(self, cycle, reverseGraph):
  253. # Call parent class constructor
  254. str_type = ''
  255. for vertex in cycle:
  256. str_type += str(vertex.type)
  257. Vertex.__init__(self, str_type)
  258. # member variables:
  259. self.internalMDST = {}
  260. minIntWeight = self.findMinIntWeight(cycle, reverseGraph)
  261. self.updateMinExtEdge(minIntWeight, reverseGraph)
  262. def findMinIntWeight(self, cycle, reverseGraph):
  263. """
  264. Find the the smallest cost of the cycle his internal incoming edges.
  265. (Also save its internalMDST (currently a cycle).)
  266. (The VertexGraph formed by the cycle will be added to the
  267. reverseGraph by calling findMinExtEdge.)
  268. """
  269. minIntWeight = float('infinity')
  270. cycleEdges = []
  271. origTgts = []
  272. for cyclePart in cycle:
  273. cycleEdges.append(reverseGraph[cyclePart])
  274. origTgts.append(reverseGraph[cyclePart].orig_tgt)
  275. for vertex in cycle:
  276. # add incoming edges to this VertexGraph
  277. for inc_edge in vertex.incoming_edges:
  278. # edge from within the cycle
  279. if inc_edge.src in cycle:
  280. minIntWeight = min(minIntWeight, inc_edge.cost)
  281. else:
  282. # edge from outside the cycle
  283. self.addIncomingEdge(inc_edge)
  284. # add outgoing edges to this VertexGraph
  285. for out_edge in vertex.outgoing_edges:
  286. if out_edge.tgt not in cycle:
  287. # edge leaves the cycle
  288. self.addOutgoingEdge(out_edge)
  289. # update src to this VertexGraph
  290. out_edge.src = self
  291. # save internal MDST
  292. min_edge = reverseGraph[vertex]
  293. if min_edge.src in cycle:
  294. self.internalMDST[vertex] = min_edge
  295. else:
  296. raise TypeError('how is this a cycle')
  297. return minIntWeight
  298. def updateMinExtEdge(self, minIntWeight, reverseGraph):
  299. """
  300. Modifies all external incoming edges their cost and finds the
  301. minimum external incoming edge with this modified weight.
  302. This found edge will break the cycle, update the internalMDST
  303. from a cycle to an MDST, updates the reverseGraph to include
  304. the vertexGraph.
  305. """
  306. minExt = None
  307. minModWeight = -float('infinity')
  308. # Find incoming edge from outside of the circle with minimal
  309. # modified cost. This edge will break the cycle.
  310. for inc_edge in self.incoming_edges:
  311. # An incoming edge (with src from within the cycle), can be
  312. # from a contracted part of the graph. Assume bc is a
  313. # contracted part (VertexGraph) a, bc is a newly formed
  314. # cycle (due to the breaking of the previous cycle bc). bc
  315. # has at least lkp incoming edges to b and c, but we should
  316. # not consider the lkp of c to break the cycle.
  317. # If we want to break a, bc, select plausable edges,
  318. # /<--\
  319. # a bc bc's MDST b <-- c
  320. # \-->/
  321. # by looking at their original targets.
  322. # (if cycle inc_edge.orig_tgt == external inc_edge.orig_tgt)
  323. if reverseGraph[inc_edge.tgt].orig_tgt == inc_edge.orig_tgt:
  324. # modify costL cost of inc_edge -
  325. # (cost of previously choosen minimum edge to cycle vertex - minIntWeight)
  326. inc_edge.cost -= (reverseGraph[inc_edge.tgt].cost - minIntWeight)
  327. if minExt is None or minModWeight > inc_edge.cost:
  328. # save better edge from outside of the cycle
  329. minExt = inc_edge
  330. minModWeight = inc_edge.cost
  331. # Example: a, b is a cycle (we know that there are no other
  332. # incoming edges to a and/or b, as there is on;y exactly one
  333. # incoming edge per vertex), and the arow from c to b represents
  334. # the minExt edge. We will remove the bottem arrow (from a to b)
  335. # /<--\ and save the minExt edge in the reverseGraph.
  336. # a b <-- c This breaks the cycle. As the internalMDST
  337. # \-->/ saves the intenal MDST, and currently still
  338. # holds a cycle, we have to remove it from the internalMDST.
  339. # We have to remove all vertex bindings of the cycle from the
  340. # reverseGraph (as it is contracted into a single VertexGraph),
  341. # and store the minExt edge to this VertexGraph in it.
  342. for int_vertex, _ in self.internalMDST.items():
  343. del reverseGraph[int_vertex] # remove cycle from reverseGraph
  344. del self.internalMDST[minExt.tgt] # remove/break cycle
  345. for inc_edge in self.incoming_edges:
  346. # update inc_edge's target to this VertexGraph
  347. inc_edge.tgt = self
  348. # save minExt edge to this VertexGraph in the reverseGraph
  349. reverseGraph[self] = minExt
  350. while True:
  351. # 2: find all cycles:
  352. cycles = []
  353. visited = set([self.root]) # root does not have incoming edges,
  354. for vertex in list(pi_v.keys()): # it can not be part of a cycle
  355. if vertex not in visited: # getCycle depends on root being in visited
  356. cycle = getCycle(vertex, pi_v, visited)
  357. if cycle != None:
  358. cycles.append(cycle)
  359. # 2: if the set of edges {pi(v), v} does not contain any cycles,
  360. # Then we found our minimum directed spanning tree
  361. # otherwise, we'll have to resolve the cycles
  362. if len(cycles) == 0:
  363. break
  364. # 3: For each formed cycle:
  365. # 3a: find internal incoming edge with the smallest cost
  366. # 3b: modify the cost of each arc which enters the cycle
  367. # 3c: replace smallert internal edge with the modified edge which has the smallest cost
  368. for cycle in cycles:
  369. # Breaks a cycle by:
  370. # - contracting cycle into VertexGraph
  371. # - finding the internal incoming edge with the smallest cost
  372. # - modify the cost of each arc which enters the cycle
  373. # - replacing the smallest internal edge with the modified edge which has the smallest cost
  374. # - changing reverseGraph accordingly (removes elements from cycle, ads vertexGraph)
  375. # (This will find a solution as the graph keeps shrinking with every cycle,
  376. # in the worst case the same amount as there are vertices, until
  377. # onlty the root and one vertexGraph remains)
  378. vertexGraph = VertexGraph(cycle, pi_v)
  379. class SortedContainer(object):
  380. """
  381. A container that keeps elemets sorted based on a given sortValue.
  382. Elements with the same value, will be returned in the order they got inserted.
  383. """
  384. def __init__(self):
  385. # member variables:
  386. self.keys = [] # stores key in sorted order (sorted when pop gets called)
  387. self.sorted = {} # {key, [elems with same key]}
  388. def add(self, sortValue, element):
  389. """
  390. Adds element with sortValue to the SortedContainer.
  391. """
  392. elems = self.sorted.get(sortValue)
  393. if elems == None:
  394. self.sorted[sortValue] = [element]
  395. self.keys.append(sortValue)
  396. else:
  397. elems.append(element)
  398. def pop(self):
  399. """
  400. Sorts the SortedContainer, returns element with smallest sortValue.
  401. """
  402. self.keys.sort()
  403. elems = self.sorted[self.keys[0]]
  404. elem = elems.pop()
  405. if len(elems) == 0:
  406. del self.sorted[self.keys[0]]
  407. del self.keys[0]
  408. return elem
  409. def empty(self):
  410. """
  411. Returns whether or not the sorted container is empty.
  412. """
  413. return (len(self.keys) == 0)
  414. def createPRIM_OP(edge, inc_cost=True):
  415. """
  416. Helper function to keep argument list short,
  417. return contracted data for a PRIM_OP.
  418. """
  419. if edge.label == PRIM_OP.inc or edge.label == PRIM_OP.out:
  420. if inc_cost: # op # vertex type # actual edge type
  421. return (edge.label, edge.orig_src.type, edge.orig_tgt.type, edge.cost)
  422. else:
  423. return (edge.label, edge.orig_src.type, edge.orig_tgt.type)
  424. elif edge.label == PRIM_OP.lkp:
  425. if inc_cost: # op # vertex/edge type # is vertex or edge
  426. return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex, edge.cost)
  427. else:
  428. return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex)
  429. else: # src, tgt operation
  430. if inc_cost: # op # actual edge type
  431. return (edge.label, edge.orig_src.type, edge.cost)
  432. else:
  433. return (edge.label, edge.orig_src.type)
  434. def flattenReverseGraph(vertex, inc_edge, reverseGraph):
  435. """
  436. Flattens the reverseGraph, so that the vertexGraph node can get
  437. processed to create a forwardGraph.
  438. """
  439. if not isinstance(vertex, VertexGraph):
  440. reverseGraph[vertex] = inc_edge
  441. else:
  442. reverseGraph[inc_edge.orig_tgt] = inc_edge
  443. for vg, eg in inc_edge.tgt.internalMDST.items():
  444. flattenReverseGraph(vg, eg, reverseGraph)
  445. if isinstance(inc_edge.src, VertexGraph):
  446. for vg, eg in inc_edge.src.internalMDST.items():
  447. flattenReverseGraph(vg, eg, reverseGraph)
  448. def createForwardGraph(vertex, inc_edge, forwardGraph):
  449. """
  450. Create a forwardGraph, keeping in mind that their can be vertexGraph
  451. in the reverseGraph.
  452. """
  453. if not isinstance(vertex, VertexGraph):
  454. forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
  455. else:
  456. forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
  457. for vg, eg in vertex.internalMDST.items():
  458. createForwardGraph(vg, eg, forwardGraph)
  459. MDST = []
  460. # pi_v contains {vertex, incoming_edge}
  461. # we want to start from root and follow the outgoing edges
  462. # so we have to build the forwardGraph graph for pi_v
  463. # (Except for the root (has 0), each vertex has exactly one incoming edge,
  464. # but might have multiple outgoing edges)
  465. forwardGraph = {} # {vertex, [outgoing edge 1, ... ] }
  466. reverseGraph = {}
  467. # flatten reverseGraph (for the vertexGraph elements)
  468. for v, e in pi_v.items():
  469. flattenReverseGraph(v, e, reverseGraph)
  470. # create the forwardGraph
  471. for vertex, edge in reverseGraph.items():
  472. createForwardGraph(vertex, edge, forwardGraph)
  473. # create the MDST in a best first manner (lowest value first)
  474. current = SortedContainer() # allows easy walking true tree
  475. for edge in forwardGraph[self.root]:
  476. current.add(edge.orig_cost, edge) # use orig cost, not modified
  477. while current.empty() != True:
  478. p_op = current.pop() # p_op contains an outgoing edge
  479. MDST.append(createPRIM_OP(p_op))
  480. for edge in forwardGraph.get(p_op.orig_tgt, []):
  481. current.add(edge.orig_cost, edge)
  482. return MDST