generator.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # coding: utf-8
  2. """
  3. Author: Sten Vercamman
  4. Univeristy of Antwerp
  5. Example code for paper: Efficient model transformations for novices
  6. url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
  7. The main goal of this code is to give an overview, and an understandable
  8. implementation, of known techniques for pattern matching and solving the
  9. sub-graph homomorphism problem. The presented techniques do not include
  10. performance adaptations/optimizations. It is not optimized to be efficient
  11. but rather for the ease of understanding the workings of the algorithms.
  12. The paper does list some possible extensions/optimizations.
  13. It is intended as a guideline, even for novices, and provides an in-depth look
  14. at the workings behind various techniques for efficient pattern matching.
  15. """
  16. import graph
  17. # import numpy as np
  18. import math
  19. import collections
  20. import random
  21. class GraphGenerator(object):
  22. """
  23. Generates a random Graph with dv an array containing all vertices (there type),
  24. de an array containing all edges (their type) and dc_inc an array representing
  25. the incoming edges (analogue for dc_out)
  26. """
  27. def __init__(self, dv, de, dc_inc, dc_out, debug=False):
  28. if len(de) != len(dc_inc):
  29. raise ValueError('de and dc_inc should be the same length.')
  30. if len(de) != len(dc_out):
  31. raise ValueError('de and dc_out should be the same length.')
  32. self.dv = dv
  33. self.de = de
  34. self.dc_inc = dc_inc
  35. self.dc_out = dc_out
  36. # print for debugging, so you know the used values
  37. if debug:
  38. print('dv')
  39. print('[',','.join(map(str,dv)),']')
  40. print('_____')
  41. print('de')
  42. print('[',','.join(map(str,de)),']')
  43. print('_____')
  44. print('dc_inc')
  45. print('[',','.join(map(str,dc_inc)),']')
  46. print('_____')
  47. print('dc_out')
  48. print('[',','.join(map(str,dc_out)),']')
  49. print('_____')
  50. self.graph = graph.Graph()
  51. self.vertices = []
  52. # create all the vertices:
  53. for v_type in self.dv:
  54. # v_type represents the type of the vertex
  55. self.vertices.append(self.graph.addCreateVertex('v' + str(v_type)))
  56. index = 0
  57. # create all edges
  58. for e_type in self.de:
  59. # e_type represents the type of the edge
  60. src = self.vertices[self.dc_out[index]] # get src vertex
  61. tgt = self.vertices[self.dc_inc[index]] # get tgt vertex
  62. self.graph.addCreateEdge(src, tgt, 'e' + str(e_type)) # create edge
  63. index += 1
  64. def getRandomGraph(self):
  65. return self.graph
  66. def getRandomPattern(self, max_nr_of_v, max_nr_of_e, start=0, debug=False):
  67. # create pattern
  68. pattern = graph.Graph()
  69. # map from graph to new pattern
  70. graph_to_pattern = {}
  71. # map of possible edges
  72. # we don't need a dict, but python v2.7 does not have an OrderedSet
  73. possible_edges = collections.OrderedDict()
  74. # set of chosen edges
  75. chosen_edges = set()
  76. # start node from graph
  77. g_node = self.vertices[start]
  78. p_node = pattern.addCreateVertex(g_node.type)
  79. # for debuging, print the order in which the pattern gets created and
  80. # connects it edges
  81. if debug:
  82. print('v'+str(id(p_node))+'=pattern.addCreateVertex('+"'"+str(g_node.type)+"'"+')')
  83. # save corrolation
  84. graph_to_pattern[g_node] = p_node
  85. def insertAllEdges(edges, possible_edges, chosen_edges):
  86. for edge in edges:
  87. # if we did not chose the edge
  88. if edge not in chosen_edges:
  89. # if inc_edge not in possible edges, add it with value 1
  90. possible_edges[edge] = None
  91. def insertEdges(g_vertex, possible_edges, chosen_edges):
  92. insertAllEdges(g_vertex.incoming_edges, possible_edges, chosen_edges)
  93. insertAllEdges(g_vertex.outgoing_edges, possible_edges, chosen_edges)
  94. insertEdges(g_node, possible_edges, chosen_edges)
  95. while max_nr_of_v > len(graph_to_pattern) and max_nr_of_e > len(chosen_edges):
  96. candidate = None
  97. if len(possible_edges) == 0:
  98. break
  99. # get a random number between 0 and len(possible_edges)
  100. # We us a triangular distribution to approximate the fact that
  101. # the first element is the longest in the possible_edges and
  102. # already had the post chance of beeing choosen.
  103. # (The approximation is because the first few ellements where
  104. # added in the same itteration, but doing this exact is
  105. # computationally expensive.)
  106. if len(possible_edges) == 1:
  107. randie = 0
  108. else:
  109. randie = int(round(random.triangular(1, len(possible_edges), len(possible_edges)))) - 1
  110. candidate = list(possible_edges.keys())[randie]
  111. del possible_edges[candidate]
  112. chosen_edges.add(candidate)
  113. src = graph_to_pattern.get(candidate.src)
  114. tgt = graph_to_pattern.get(candidate.tgt)
  115. src_is_new = True
  116. if src != None and tgt != None:
  117. # create edge between source and target
  118. pattern.addCreateEdge(src, tgt, candidate.type)
  119. if debug:
  120. print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
  121. # skip adding new edges
  122. continue
  123. elif src == None:
  124. # create pattern vertex
  125. src = pattern.addCreateVertex(candidate.src.type)
  126. if debug:
  127. print('v'+str(id(src))+'=pattern.addCreateVertex('+"'"+str(candidate.src.type)+"'"+')')
  128. # map newly created pattern vertex
  129. graph_to_pattern[candidate.src] = src
  130. # create edge between source and target
  131. pattern.addCreateEdge(src, tgt, candidate.type)
  132. if debug:
  133. print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
  134. elif tgt == None:
  135. src_is_new = False
  136. # create pattern vertex
  137. tgt = pattern.addCreateVertex(candidate.tgt.type)
  138. if debug:
  139. print('v'+str(id(tgt))+'=pattern.addCreateVertex('+"'"+str(candidate.tgt.type)+"'"+')')
  140. # map newly created pattern vertex
  141. graph_to_pattern[candidate.tgt] = tgt
  142. # create edge between source and target
  143. pattern.addCreateEdge(src, tgt, candidate.type)
  144. if debug:
  145. print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
  146. else:
  147. raise RuntimeError('Bug: src or tgt of edge should be in out pattern')
  148. # select the vertex from the chosen edge that was not yet part of the pattern
  149. if src_is_new:
  150. new_vertex = candidate.src
  151. else:
  152. new_vertex = candidate.tgt
  153. # insert all edges from the new vertex
  154. insertEdges(new_vertex, possible_edges, chosen_edges)
  155. return pattern
  156. def createConstantPattern():
  157. """
  158. Use this to create the same pattern over and over again.
  159. """
  160. # create pattern
  161. pattern = graph.Graph()
  162. # copy and paste printed pattern from debug output or create a pattern
  163. # below the following line:
  164. # ----------------------------------------------------------------------
  165. v4447242448=pattern.addCreateVertex('v4')
  166. v4457323088=pattern.addCreateVertex('v6')
  167. pattern.addCreateEdge(v4447242448, v4457323088, 'e4')
  168. v4457323216=pattern.addCreateVertex('v8')
  169. pattern.addCreateEdge(v4457323216, v4447242448, 'e4')
  170. v4457323344=pattern.addCreateVertex('v7')
  171. pattern.addCreateEdge(v4457323216, v4457323344, 'e3')
  172. v4457323472=pattern.addCreateVertex('v7')
  173. pattern.addCreateEdge(v4457323344, v4457323472, 'e1')
  174. # ----------------------------------------------------------------------
  175. return pattern