Просмотр исходного кода

Add Sten Vercammen's pattern matching library (ported to Python 3, numpy dependency replaced by standard library)

Joeri Exelmans 11 месяцев назад
Родитель
Сommit
95a8076a17

+ 31 - 0
pattern_matching/enum.py

@@ -0,0 +1,31 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+class Enum(object):
+	"""
+	Custom Enum object for compatibility (enum is introduced in python 3.4)
+	Usage create	: a = Enum(['e0', 'e1', ...])
+	Usage call		: a.e0
+	"""
+	def __init__(self, args):
+		next	= 0
+		for arg in args:
+			self.__dict__[arg] = next
+			next	+= 1

+ 202 - 0
pattern_matching/generator.py

@@ -0,0 +1,202 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+import graph
+# import numpy as np
+import math
+import collections
+import random
+
+class GraphGenerator(object):
+	"""
+	Generates a random Graph with dv an array containing all vertices (there type),
+	de an array containing all edges (their type) and dc_inc an array representing
+	the incoming edges (analogue for dc_out)
+	"""
+	def __init__(self, dv, de, dc_inc, dc_out, debug=False):
+		if len(de) != len(dc_inc):
+			raise ValueError('de and dc_inc should be the same length.')
+		if len(de) != len(dc_out):
+			raise ValueError('de and dc_out should be the same length.')
+
+		self.dv = dv
+		self.de = de
+		self.dc_inc = dc_inc
+		self.dc_out = dc_out
+
+		# print for debugging, so you know the used values
+		if debug:
+			print('dv')
+			print('[',','.join(map(str,dv)),']')
+			print('_____')
+			print('de')
+			print('[',','.join(map(str,de)),']')
+			print('_____')
+			print('dc_inc')
+			print('[',','.join(map(str,dc_inc)),']')
+			print('_____')
+			print('dc_out')
+			print('[',','.join(map(str,dc_out)),']')
+			print('_____')
+
+		self.graph	= graph.Graph()
+		self.vertices	= []
+		# create all the vertices:
+		for v_type in self.dv:
+			# v_type represents the type of the vertex
+			self.vertices.append(self.graph.addCreateVertex('v' + str(v_type)))
+		
+		index	= 0
+		# create all edges
+		for e_type in self.de:
+			# e_type represents the type of the edge
+			src	= self.vertices[self.dc_out[index]]		# get src vertex
+			tgt	= self.vertices[self.dc_inc[index]]		# get tgt vertex
+			self.graph.addCreateEdge(src, tgt, 'e' + str(e_type))	# create edge
+			index	+= 1
+
+	def getRandomGraph(self):
+		return self.graph
+
+	def getRandomPattern(self, max_nr_of_v, max_nr_of_e, start=0, debug=False):
+		# create pattern
+		pattern	= graph.Graph()
+
+		# map from graph to new pattern
+		graph_to_pattern	= {}
+
+		# map of possible edges
+		# we don't need a dict, but python v2.7 does not have an OrderedSet
+		possible_edges	= collections.OrderedDict()
+
+		# set of chosen edges
+		chosen_edges	= set()
+
+		# start node from graph
+		g_node	= self.vertices[start]
+		p_node	= pattern.addCreateVertex(g_node.type)
+		# for debuging, print the order in which the pattern gets created and
+		# connects it edges
+		if debug:
+			print('v'+str(id(p_node))+'=pattern.addCreateVertex('+"'"+str(g_node.type)+"'"+')')
+		# save corrolation
+		graph_to_pattern[g_node]	= p_node
+
+		def insertAllEdges(edges, possible_edges, chosen_edges):
+			for edge in edges:
+				# if we did not chose the edge
+				if edge not in chosen_edges:
+					# if inc_edge not in possible edges, add it with value 1
+					possible_edges[edge]	= None
+
+		def insertEdges(g_vertex, possible_edges, chosen_edges):
+			insertAllEdges(g_vertex.incoming_edges, possible_edges, chosen_edges)
+			insertAllEdges(g_vertex.outgoing_edges, possible_edges, chosen_edges)
+
+		insertEdges(g_node, possible_edges, chosen_edges)
+
+		while max_nr_of_v > len(graph_to_pattern) and max_nr_of_e > len(chosen_edges):
+			candidate	= None
+			if len(possible_edges) == 0:
+				break
+			# get a random number between 0 and len(possible_edges)
+			# We us a triangular distribution to approximate the fact that
+			# the first element is the longest in the possible_edges and
+			# already had the post chance of beeing choosen.
+			# (The approximation is because the first few ellements where
+			# added in the same itteration, but doing this exact is
+			# computationally expensive.)
+			if len(possible_edges) == 1:
+				randie	= 0
+			else:
+				randie	= int(round(random.triangular(1, len(possible_edges), len(possible_edges)))) - 1
+			candidate	= list(possible_edges.keys())[randie]
+			del possible_edges[candidate]
+			chosen_edges.add(candidate)
+
+			src	= graph_to_pattern.get(candidate.src)
+			tgt	= graph_to_pattern.get(candidate.tgt)
+			src_is_new	= True
+			if src != None and tgt != None:
+				# create edge between source and target
+				pattern.addCreateEdge(src, tgt, candidate.type)
+				if debug:
+					print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
+				# skip adding new edges
+				continue
+			elif src == None:
+				# create pattern vertex
+				src	= pattern.addCreateVertex(candidate.src.type)
+				if debug:
+					print('v'+str(id(src))+'=pattern.addCreateVertex('+"'"+str(candidate.src.type)+"'"+')')
+				# map newly created pattern vertex
+				graph_to_pattern[candidate.src]	= src
+				# create edge between source and target
+				pattern.addCreateEdge(src, tgt, candidate.type)
+				if debug:
+					print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
+			elif tgt == None:
+				src_is_new	= False
+				# create pattern vertex
+				tgt	= pattern.addCreateVertex(candidate.tgt.type)
+				if debug:
+					print('v'+str(id(tgt))+'=pattern.addCreateVertex('+"'"+str(candidate.tgt.type)+"'"+')')
+				# map newly created pattern vertex
+				graph_to_pattern[candidate.tgt]	= tgt
+				# create edge between source and target
+				pattern.addCreateEdge(src, tgt, candidate.type)
+				if debug:
+					print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
+			else:
+				raise RuntimeError('Bug: src or tgt of edge should be in out pattern')
+
+			# select the vertex from the chosen edge that was not yet part of the pattern
+			if src_is_new:
+				new_vertex	= candidate.src
+			else:
+				new_vertex	= candidate.tgt
+			# insert all edges from the new vertex
+			insertEdges(new_vertex, possible_edges, chosen_edges)
+
+		return pattern
+
+	def createConstantPattern():
+		"""
+		Use this to create the same pattern over and over again.
+		"""
+		# create pattern
+		pattern	= graph.Graph()
+
+
+		# copy and paste printed pattern from debug output or create a pattern
+		# below the following line:
+		# ----------------------------------------------------------------------
+		v4447242448=pattern.addCreateVertex('v4')
+		v4457323088=pattern.addCreateVertex('v6')
+		pattern.addCreateEdge(v4447242448, v4457323088, 'e4')
+		v4457323216=pattern.addCreateVertex('v8')
+		pattern.addCreateEdge(v4457323216, v4447242448, 'e4')
+		v4457323344=pattern.addCreateVertex('v7')
+		pattern.addCreateEdge(v4457323216, v4457323344, 'e3')
+		v4457323472=pattern.addCreateVertex('v7')
+		pattern.addCreateEdge(v4457323344, v4457323472, 'e1')
+
+		# ----------------------------------------------------------------------
+		return pattern

+ 157 - 0
pattern_matching/graph.py

@@ -0,0 +1,157 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+class Properties(object):
+	"""
+	Holds all Properties.
+	"""
+	def __init__(self):
+		# member variables:
+		self.properties	= {}
+
+	def addProperty(self, name, value):
+		"""
+		Adds property (overrides if name already exists).
+		"""
+		self.properties[name]	= value
+
+	def getProperty(self, name):
+		"""
+		Returns property with given name or None if not found.
+		"""
+		return self.properties.get(name)
+
+class Edge(Properties):
+	"""
+	Describes an Edge with source and target Node.
+	The Edge can have several properties, like a name, a weight, etc...
+	"""
+	def __init__(self, src, tgt, str_type=None):
+		# Call parent class constructor
+		Properties.__init__(self)
+		# member variables:
+		self.src	= src
+		self.tgt	= tgt
+		self.type	= str_type
+
+class Vertex(Properties):
+	"""
+	Describes a Vertex with incoming, outgoing and undirected (both ways) edges.
+	The vertex can have several properties, like a name, a weight, etc...
+	"""
+	def __init__(self, str_type):
+		# Call parent class constructor
+		Properties.__init__(self)
+		# member variables:
+		self.incoming_edges	= set()	# undirected edges should be stored both in
+		self.outgoing_edges	= set()	# incoming and outgoing edges
+		self.type			= str_type
+
+	def addIncomingEdge(self, edge):
+		"""
+		Adds an incoming Edge.
+		"""
+		if not isinstance(edge, Edge):
+			raise TypeError('addIncomingEdge without it being an edge')
+		self.incoming_edges.add(edge)
+
+	def addOutgoingEdge(self, edge):
+		"""
+		Adds an outgoing Edge.
+		"""
+		if not isinstance(edge, Edge):
+			raise TypeError('addOutgoingEdge without it being an edge')
+		self.outgoing_edges.add(edge)
+
+	def addUndirectedEdge(self, edge):
+		"""
+		Adds an undirected (or bi-directed) Edge.
+		"""
+		self.addIncomingEdge(edge)
+		self.addOutgoingEdge(edge)
+
+class Graph(object):
+	"""
+	Holds a Graph.
+	"""
+	def __init__(self):
+		# member variables:
+		# redundant type keeping, "needed" for fast iterating over specific type
+		self.vertices	= {}	# {type, set(v1, v2, ...)}
+		self.edges		= {}	# {type, set(e1, e2, ...)}
+
+	def addCreateVertex(self, str_type):
+		"""
+		Creates a Vertex of str_type, stores it and returs it
+		(so that properties can be added to it).
+		"""
+		vertex	= Vertex(str_type)
+		self.addVertex(vertex)
+		return vertex
+
+	def addVertex(self, vertex):
+		"""
+		Stores a Vertex into the Graph.
+		"""
+		if not isinstance(vertex, Vertex):
+			raise TypeError('addVertex expects a Vertex')
+		# add vertex, but it first creates a new set for the vertex type
+		# if the type does not exist in the dictionary
+		self.vertices.setdefault(vertex.type, set()).add(vertex)
+
+	def getVerticesOfType(self, str_type):
+		"""
+		Returns all vertices of a specific type,
+		Return [] if there are no vertices with the given type
+		"""
+		return self.vertices.get(str_type, [])
+
+	def getEdgesOfType(self, str_type):
+		"""
+		Returns all edges of a specific type,
+		Return [] if there are no edges with the given type
+		"""
+		return self.edges.get(str_type, [])
+
+	def addCreateEdge(self, src, tgt, str_type):
+		"""
+		Creates edge of str_type from src to tgt, and returns it,
+		so that properties can be added to the edge.
+		"""
+		if not isinstance(src, Vertex):
+			raise TypeError('addCreateEdge: src is not a Vertex')
+		if not isinstance(tgt, Vertex):
+			raise TypeError('addCreateEdge: tgt is not a Vertex')
+		edge	= Edge(src, tgt, str_type)
+		# link vertices connected to this edge
+		edge.src.addOutgoingEdge(edge)
+		edge.tgt.addIncomingEdge(edge)
+		self.addEdge(edge)
+		return edge
+
+	def addEdge(self, edge):
+		"""
+		Stores an Edge into the Graph.
+		"""
+		if not isinstance(edge, Edge):
+			raise TypeError('addEdge expects an Edge')
+		# add edge, but it first creates a new set for the edge type
+		# if the type does not exist in the dictionary
+		self.edges.setdefault(edge.type, set()).add(edge)

+ 44 - 0
pattern_matching/graphToDot.py

@@ -0,0 +1,44 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+import graph as mg
+
+def printGraph(fileName, graph, matched_v={}, matched_e={}):
+	if not isinstance(graph, mg.Graph):
+		raise TypeError('Can only print Graph Graphs')
+
+	with open(fileName, 'w') as f:
+		f.write('digraph randomGraph {\n\n')
+		for str_type, plan_vertices in graph.vertices.items():
+			for plan_vertex in plan_vertices:
+				vertex_str	= str(id(plan_vertex)) + ' [label="'+str(str_type)+'"'
+				if plan_vertex in list(matched_v.values()):
+					vertex_str	+= ', style=dashed, style=filled]\n'
+				else:
+					vertex_str	+= ']\n'
+				f.write(vertex_str)
+				for out_edge in plan_vertex.outgoing_edges:
+					edge_str	= str(id(plan_vertex)) + ' -> ' + str(id(out_edge.tgt)) + ' [label="'+str(out_edge.type)+'"'
+					if out_edge in list(matched_e.values()):
+						edge_str	+= ', style=dashed, penwidth = 4]\n'
+					else:
+						edge_str	+= ']\n'
+					f.write(edge_str)
+		f.write('\n}')

+ 88 - 0
pattern_matching/main.py

@@ -0,0 +1,88 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+from generator			import *
+from patternMatching	import *
+
+import graphToDot
+
+import random
+
+debug = False
+
+if __name__ == '__main__':
+	"""
+	The main function called when running from the command line.
+	"""
+	nr_of_vertices		= 50
+	nr_of_diff_types_v	= 10
+	nr_of_edges			= 150
+	nr_of_diff_types_e	= 10
+
+	dv      = [random.randint(0, nr_of_diff_types_v) for _ in range(nr_of_vertices)]
+	# dv		= np.random.random_integers(0, nr_of_diff_types_v, nr_of_vertices)
+	de      = [random.randint(0, nr_of_diff_types_e) for _ in range(nr_of_edges)]
+	# de		= np.random.random_integers(0, nr_of_diff_types_e, nr_of_edges)
+	dc_inc	= [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)]
+	# dc_inc	= np.random.random_integers(0, nr_of_vertices-1, nr_of_edges)
+	dc_out	= [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)]
+	# dc_out	= np.random.random_integers(0, nr_of_vertices-1, nr_of_edges)
+
+	# override random graph by copy pasting output from terminal
+	dv		= [ 10,5,4,0,8,6,8,0,4,8,5,5,7,0,10,0,5,6,10,4,0,3,0,8,2,7,5,8,1,0,2,10,0,0,1,6,8,4,7,6,4,2,10,10,6,4,6,0,2,7 ]
+	de		= [ 8,10,8,1,6,7,4,3,5,2,0,0,9,6,0,3,8,3,2,7,2,3,10,8,10,8,10,2,5,5,10,6,7,5,1,2,1,2,2,3,7,7,2,1,7,2,9,10,8,1,9,4,1,3,1,1,8,2,2,9,10,9,1,9,4,10,10,10,9,3,5,3,6,6,9,1,2,6,3,2,4,10,9,6,5,6,2,4,3,2,4,10,6,2,8,8,0,5,1,7,3,4,3,8,7,3,0,8,3,3,8,5,10,5,9,3,1,10,3,2,6,3,10,0,5,10,9,10,0,1,4,7,10,3,1,9,1,2,3,7,4,3,7,8,8,4,5,10,1,4 ]
+	dc_inc	= [ 0,25,18,47,22,25,16,45,38,25,5,45,15,44,17,46,6,17,35,8,16,29,48,47,25,34,4,20,24,1,47,44,8,25,32,3,16,6,33,21,6,13,41,10,17,25,21,33,31,30,5,4,45,26,16,42,12,25,29,3,32,30,14,26,11,13,7,13,3,43,43,22,48,37,20,28,15,40,19,33,43,16,49,36,11,25,9,42,3,22,16,40,42,44,27,30,1,18,10,35,19,6,9,43,37,38,45,19,41,14,37,45,0,31,29,31,24,20,44,46,8,45,43,3,38,38,35,12,19,45,7,34,20,28,12,17,45,17,35,49,20,21,49,1,35,38,38,36,33,30 ]
+	dc_out	= [ 9,2,49,49,37,33,16,21,5,46,4,15,9,6,14,22,16,33,23,21,15,31,37,23,47,3,30,26,35,9,29,21,39,32,22,43,5,9,41,30,31,30,37,33,31,34,23,22,34,26,44,36,38,33,48,5,9,34,13,7,48,41,43,26,26,7,12,6,12,28,22,8,29,22,24,27,16,4,31,41,32,15,19,20,38,0,26,18,43,46,40,17,29,14,34,14,32,17,32,47,16,45,7,4,35,22,42,11,38,2,0,29,4,38,17,44,9,23,5,10,31,17,1,11,16,5,37,27,35,32,45,16,18,1,14,4,42,24,43,31,21,38,6,34,39,46,20,1,38,47 ]
+	
+	gg	= GraphGenerator(dv, de, dc_inc, dc_out, debug)
+
+	graph	= gg.getRandomGraph()
+	pattern	= gg.getRandomPattern(5, 15, debug=debug)
+
+
+	# override random pattern by copy pasting output from terminal to create
+	# pattern, paste it in the createConstantPattern function in the generator.py
+	# pattern	= gg.createConstantPattern()
+
+	# generate here to know pattern and graph before searching it
+	graphToDot.printGraph('randomPattern.dot', pattern)
+	graphToDot.printGraph('randomGraph.dot', graph)
+
+	
+	#PM	= PatternMatching('naive')
+	#PM	= PatternMatching('SP')
+	# PM	= PatternMatching('Ullmann')
+	PM	= PatternMatching('VF2')
+	v,e = PM.match(pattern, graph)
+
+	# regenerate graph, to show matched pattern
+	graphToDot.printGraph('randomGraph.dot', graph, v, e)
+
+	if debug:
+		print(len(v))
+		print('___')
+		print(v)
+		for key, value in v.items():
+			print(value.type)
+		print(len(e))
+		print(e)
+		print('___')
+		for key, value in e.items():
+			print(value.type)

+ 947 - 0
pattern_matching/patternMatching.py

@@ -0,0 +1,947 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+from planGraph import *
+
+import collections
+import itertools
+# import numpy as np
+
+class PatternMatching(object):
+	"""
+	Returns an occurrence of a given pattern from the given Graph
+	"""
+	def __init__(self, matching_type='SP', optimize=True):
+		# store the type of matching we want to use
+		self.type			= matching_type
+		self.bound_vertices	= {}	# saves the currently bound vertices
+		self.bound_edges	= {}	# saves the currently bound edges
+		self.result			= None
+		self.previous		= []
+		self.optimize		= optimize
+
+	def match(self, pattern, graph):
+		"""
+		Call this function to find an occurrence of the pattern in the (host) graph.
+		Setting the type of matching (naive, SP, Ullmann, VF2) is done by
+		setting self.matching_type to its name.
+		"""
+		if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)):
+			raise TypeError('pattern must be a SearchGraph or Graph')
+		if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)):
+			raise TypeError('graph must be a SearchGraph or Graph')
+
+		self.pattern	= pattern
+		self.graph		= graph
+
+		if self.type	== 'naive':
+			result	= self.matchNaive(vertices=graph.vertices, edges=graph.edges)
+		elif self.type	== 'SP':
+			result	= self.matchSP()
+		elif self.type	== 'Ullmann':
+			result	= self.matchUllmann()
+		elif self.type	== 'VF2':
+			result	= self.matchVF2()
+		else:
+			raise ValueError('Unknown type for matching')
+
+		# cleanup
+		self.pattern		= None
+		self.graph			= None
+		self.bound_vertices	= {}
+		self.bound_edges	= {}
+		self.result			= None
+
+		return result
+
+	def matchNaive(self, pattern_vertices=None, vertices=None, edges=None):
+		"""
+		Try to find an occurrence of the pattern in the Graph naively. 
+		"""
+		# allow call with specific arguments
+		if pattern_vertices		== None:
+			pattern_vertices	= self.pattern.vertices
+		if vertices		== None:
+			vertices	= self.bound_vertices
+		if edges		== None:
+			edges		= self.bound_edges
+
+		def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+			"""
+			Visit a pattern edge, and try to bind it to a graph edge.
+			(If the first fails, try the second, and so on...)
+			"""
+			for g_edge in g_edges:
+				# only reckon the edge if its in edges and not visited
+				# (as the graph might be a subgraph of a more complex graph)
+				if g_edge not in edges.get(g_edge.type, []) or g_edge in visited_g_edges:
+					continue
+				if g_edge.type == p_edge.type and g_edge not in visited_g_edges:
+					visited_p_edges[p_edge]	= g_edge
+					visited_g_edges.add(g_edge)
+					if inc:
+						p_vertex	= p_edge.src
+					else:
+						p_vertex	= p_edge.tgt
+					if visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+						return True
+					# remove added edges if they lead to no match, retry with others
+					del visited_p_edges[p_edge]
+					visited_g_edges.remove(g_edge)
+			# no edge leads to a possitive match
+			return False
+
+		def visitEdges(pattern_vertices, p_edges, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+			"""
+			Visit all edges of the pattern vertex (edges given as argument).
+			We need to try visiting them for all its permutations, as matching
+			v -e1-> first and v -e2-> second and v -e3-> third, might not result
+			in a matching an occurrence of the pattern, but matching v -e2->
+			first and v -e3-> second and v -e1-> third might.
+			"""
+			def removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges):
+				"""
+				Undo the binding of the brevious edge, (the current bindinds do
+				not lead to an occurrence of the pattern in the graph).
+				"""
+				for wrong_edge in visitedEdges:
+					# remove binding (pattern edge to graph edge)
+					wrong_g_edge	= visited_p_edges.get(wrong_edge)
+					del visited_p_edges[wrong_edge]
+					# remove visited graph edge
+					visited_g_edges.remove(wrong_g_edge)
+
+			for it in itertools.permutations(p_edges):
+				visitedEdges	= []
+				foundallEdges	= True
+				for edge in it:
+					if visited_p_edges.get(edge) == None:
+						if not visitEdge(pattern_vertices, edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+							# this did not work, so we have to undo all added edges
+							# (the current edge is not added, as it failed)
+							# we then can try a different permutation
+							removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
+							foundallEdges	= False
+							break	# try other order
+						# add good visited (we know it succeeded)
+						visitedEdges.append(edge)
+					else:
+						# we visited this pattern edge, and have the coressponding graph edge
+						# if it is an incoming pattern edge, we need to make sure that
+						# the graph target that is map from the pattern target
+						# (of this incoming pattern edge, which has to be bound at this point)
+						# has the graph adge as an incoming edge,
+						# otherwise the graph is not properly connected
+						if inc:
+							if not visited_p_edges[edge] in visited_p_vertices[edge.tgt].incoming_edges:
+								# did not work
+								removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
+								foundallEdges	= False
+								break	# try other order
+						else:
+							# analog for an outgoing edge
+							if not visited_p_edges[edge] in visited_p_vertices[edge.src].outgoing_edges:
+								# did not work
+								removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
+								foundallEdges	= False
+								break	# try other order
+
+				# all edges are good, look no further
+				if foundallEdges:
+					break
+			return foundallEdges
+
+		def visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+			"""
+			Visit a pattern vertex, and try to bind it to the graph vertex
+			(both are given as argument). A binding is successful if all the
+			pattern vertex his incoming and outgoing edges can be bound
+			(to the graph vertex).
+			"""
+			if g_vertex in visited_g_vertices:
+				return False
+			# save visited graph vertex
+			visited_g_vertices.add(g_vertex)
+			# map pattern vertex to visited graph vertex
+			visited_p_vertices[p_vertex]	= g_vertex
+
+			if visitEdges(pattern_vertices, p_vertex.incoming_edges, True, g_vertex.incoming_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+				if visitEdges(pattern_vertices, p_vertex.outgoing_edges, False, g_vertex.outgoing_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+					return True
+			# cleanup, remove from visited as this does not lead to
+			# an occurrence of the pttern in the graph
+			visited_g_vertices.remove(g_vertex)
+			del visited_p_vertices[p_vertex]
+			return False
+
+		def visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+			"""
+			Visit a pattern vertex and try to bind a graph vertex to it.
+			"""
+			# if already matched or if it is a vertex not in the pattern_vertices
+			# (second is for when you want to match the pattern partionally)
+			if visited_p_vertices.get(p_vertex) != None or p_vertex not in pattern_vertices.get(p_vertex.type, set()):
+				return True
+
+			# try visiting graph vertices of same type as pattern vertex
+			for g_vertex in vertices.get(p_vertex.type, []):
+				if g_vertex not in visited_g_vertices:
+					if visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+						return True
+
+			return False
+
+		visited_p_vertices	= {}
+		visited_p_edges		= {}
+		visited_g_vertices	= set()
+		visited_g_edges		= set()
+
+		# for loop is need for when pattern consists of multiple not connected structures
+		allVertices	= []
+		for _, p_vertices in pattern_vertices.items():
+			allVertices.extend(p_vertices)
+		foundIt = False
+		for it_p_vertices in itertools.permutations(allVertices):
+			foundIt = True
+			for p_vertex in it_p_vertices:
+				if not visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
+					foundIt = False
+					# reset visited
+					visited_p_vertices	= {}
+					visited_p_edges		= {}
+					visited_g_vertices	= set()
+					visited_g_edges		= set()
+					break
+			if foundIt:
+				break
+		if foundIt:
+			return (visited_p_vertices, visited_p_edges)
+		else:
+			return None
+
+	def matchSP(self):
+		"""
+		Find an occurrence of the pattern in the Graph
+		by using the generated SearchPlan.
+		"""
+		if isinstance(self.graph, Graph):
+			sg	= SearchGraph(self.graph)
+		elif isinstance(self.graph, SearchGraph):
+			sg = self.graph
+		else:
+			raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph')
+
+		pg	= PlanGraph(self.pattern)
+		SP	= pg.Edmonds(sg)
+
+		self.fileIndex = 0
+
+		def propConnected():
+			"""
+			Checks if the found vertices and edges can be uniquely matched
+			onto the pattern graph.
+			"""
+			self.result = self.matchNaive()
+			return self.result != None
+
+		def matchOP(elem, bound, ops, index):
+			"""
+			Execute a primitive operation, return whether ot not it succeeded.
+			"""
+			type_bound	= bound.setdefault(elem.type, set())
+			# if elem not yet bound, bind it, and try matching the next operations
+			if elem not in type_bound:
+				type_bound.add(elem)
+				# if matching of next operation failed, try with a different elem
+				if matchAllOP(ops, index+1):
+					return True
+				else:
+					type_bound.remove(elem)
+			return False
+
+		def matchAllOP(ops, index=0):
+			"""
+			Try to match an occurrence of the pattern in the graph,
+			by recursivly ,atching elements that adhere to the SearchPlan
+			"""
+			# if we matched all elements,
+			# check if the bound elements are properly connected
+			if index == len(ops):
+				return propConnected()
+
+			op = ops[index]
+
+			if op[0] == PRIM_OP.lkp:	# lkp(elem)
+				if op[2]:	# lookup a vertex
+					# If the graph does not have a vertex of the same vertex
+					# type, we'll have to return False, happens if elems == [].
+					elems	= self.graph.vertices.get(op[1], [])
+					bound	= self.bound_vertices
+				else:		# loopup an edge
+					# If the graph does not have an edge of the same edge
+					# type, we'll have to return False, happens if elems == [].
+					elems	= self.graph.edges.get(op[1], [])
+					bound	= self.bound_edges
+				
+				# if elems == [], we'll skip the loop and return False
+				for elem in elems:
+					if matchOP(elem, bound, ops, index):
+						return True
+				# if all not bound elems fails, backtrack
+				return False
+
+			elif op[0] == PRIM_OP.src:	# src(e): bind src of a bound edge e
+				# Should always succeed, as the edge must be already bound
+				# (there should be at least one elem in self.bound_edges[op[1]]).
+				for edge in self.bound_edges[op[1]]:
+					if matchOP(edge.src, self.bound_vertices, ops, index):
+						return True
+				# if all not bound elems fails, backtrack
+				return False
+
+			elif op[0] == PRIM_OP.tgt:	# tgt(e): bind tgt of a bound edge e
+				# Should always succeed, as the edge must be already bound
+				# (there should be at least one elem in self.bound_edges[op[1]]).
+				for edge in self.bound_edges[op[1]]:
+					if matchOP(edge.tgt, self.bound_vertices, ops, index):
+						return True
+				# if all not bound elems fails, backtrack
+				return False
+
+			elif op[0] == PRIM_OP.inc:	# in(v, e):  bind incoming edge e of a bound vertex v
+				# It's possible we will try to find a vertex of a certain type
+				# in the bound_vertices which should be bound implicitly
+				# (by a src/tgt op), that is not bound. Happens when implicit
+				# binding bounded a "wrong" vertex. We then need to return False
+				# (happens by skiping for loop by looping over [])
+				for vertex in self.bound_vertices.get(op[1], []):
+					for edge in vertex.incoming_edges:
+						if edge.type == op[2]:
+							if matchOP(edge, self.bound_edges, ops, index):
+								return True
+				# if all not bound elems fails, backtrack
+				return False
+
+			elif op[0] == PRIM_OP.out:	# out(v, e): bind outgoing edge e of a bound vertex v
+				# Return False if we expect an element to be bound that is not
+				# bound (for the same reason as the inc op).
+				for vertex in self.bound_vertices.get(op[1], []):
+					for edge in vertex.outgoing_edges:
+						if edge.type == op[2]:
+							if matchOP(edge, self.bound_edges, ops, index):
+								return True
+				# if all not bound elems fails, backtrack
+				return False
+			else:
+				raise TypeError('Unknown PRIM_OP type')
+
+		# try and match all (primitive) operations from the SearchPlan
+		matchAllOP(SP)
+
+		# Either nothing is found, or we found an occurrence,
+		# it is impossble to have a partionally matched occurrence
+		for key, bound_elems in self.bound_vertices.items():
+			if len(bound_elems) == 0:
+				# The pattern does not exist in the Graph
+				return None
+			else:
+				# We found a pattern
+				return self.result
+		
+
+	def createAdjacencyMatrixMap(self, graph):
+		"""
+		Return adjacency matrix and the order of the vertices.
+		"""
+		matrix		= collections.OrderedDict()	# { vertex, (index, [has edge from index to pos?]) }
+
+		# contains all vertices we'll use for the AdjacencyMatrix
+		allVertices	= []
+
+		if self.optimize:
+			# insert only the vertices from the graph which have a type
+			# that is present in the pattern
+			for vertex_type, _ in self.pattern.vertices.items():
+				graph_vertices	= graph.vertices.get(vertex_type)
+				if graph_vertices	!= None:
+					allVertices.extend(graph_vertices)
+				else:
+					# we will not be able to find the pattern
+					# as the pattern contains a vertex of a certain type
+					# that is not present in the host graph
+					return False
+		else:
+			# insert all vertices from the graph
+			for _, vertices in graph.vertices.items():
+				allVertices.extend(vertices)
+
+		# create squared zero matrix
+		index	= 0
+		for vertex in allVertices:
+			matrix[vertex]	= (index, [False] * len(allVertices))
+			index	+= 1
+
+		for _, edges in graph.edges.items():
+			for edge in edges:
+				if self.optimize:
+					if edge.tgt not in matrix or edge.src not in matrix:
+						# skip adding edge if  the target or source type
+						# is not present in the pattern
+						# (and therefor not added to the matrix)
+						continue
+				index	= matrix[edge.tgt][0]
+				matrix[edge.src][1][index]	= True
+
+		AM				= []
+		vertices_order	= []
+		for vertex, row in matrix.items():
+			AM.append(row[1])
+			vertices_order.append(vertex)
+
+		return AM, vertices_order
+
+	def matchUllmann(self):
+		"""
+		Find an occurrence of the pattern in the Graph
+		by using Ullmann for solving the Constraint Satisfaction Problem (CSP).
+		"""
+
+		def createM_star(h, p):
+			"""
+			Create M*[v, w]	= 1 if deg(v) <= deg(w), for v in V_P, w in V_H
+							= 0 otherwise
+
+			M and P are given to ensure corect order.
+			"""
+			m		= []	# [[..], ...]
+			for p_vertex in p:
+				row	= []
+				for g_vertex in h:
+					# for the degree function, we choose to look at the
+					# outgoing edges AND the incoming edges
+					# (one might prefer to use only one of them)
+					if self.optimize:
+						# also check if type matches
+						if p_vertex.type != g_vertex.type:
+							row.append(False)
+							continue
+					row.append(	len(p_vertex.incoming_edges) <=
+								len(g_vertex.incoming_edges) and
+								len(p_vertex.outgoing_edges) <=
+								len(g_vertex.outgoing_edges))
+				m.append(row)
+
+			return m
+
+		def createDecreasingOrder(h):
+			"""
+			It turns out that the more edges a vertex has, the sooner it will
+			fail in matching the pattern. For efficiency reasons, we want it
+			to fail as fast as possible.
+			"""
+			order	= []	# [(value, index), ...]
+			index	= 0
+			for g_vertex in h:
+				order.append((	len(g_vertex.outgoing_edges) +
+								len(g_vertex.outgoing_edges), index))
+				index	+= 1
+
+			order.sort(key = lambda elem: elem[0])
+			# sort and only return the indices (which specify the order)
+			return [index for (_, index) in order]
+
+		def propConnected(M, H, P, h, p):
+			"""
+			Checks if the vertices represented in M are isomorphic to P and if
+			they can be matched onto the pattern graph.
+			"""
+			print(M, H, P, h, p)
+			# P_candi	= np.dot(M, np.transpose(np.dot(M, H)))
+
+
+			"""
+			# If we do not aply the refineM function, we will want to check if
+			# this succeeds, as it checks for isomorphism.
+			# If we apply the refineM function, it is garanteed to be isomorphic.
+
+			index_column	= 0
+			for row in P_candi:
+				index_row	= 0
+				for item in row:
+					# for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1
+					# (not the other way around)
+					# (return False when item is 0 and P[i,j] is 1)
+					if item < P[index_row][index_column]:
+						return False
+					index_row	+= 1
+				index_column	+= 1
+			"""
+
+			vertices	= {}
+			index_column	= 0
+			for row in M:
+				index_row	= 0
+				for item in row:
+					# there should only be one item per row
+					if item:
+						vertex	= h[index_row]
+						vertices.setdefault(vertex.type, set()).add(vertex)
+						break
+					index_row	+= 1
+				index_column	+= 1
+
+			self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges)
+			return self.result != None
+
+		def refineM(M, H, P, h, pp):
+			"""
+			Refine M, for every vertex from the pattern, check if each possible
+			matching (candidate) his neighbours can also be matched. (M's column
+			represents vertices from P, and the row represents its candidate.)
+			If this is not possible set M[i,j] to false, refining/reducing the
+			search space.
+			"""
+			any_changes=True
+			while any_changes:
+				any_changes = False
+				# for all vertices from the pattern
+				for i in range(0, len(P)):	# P is a nxn-matrix
+					# for all its possible assignments
+					for j in range(0, len(H[0])):
+						# if bound vertex of P, check if all neigbours are matchable
+						if M[i][j]:
+							# for all the pattern his neighbours
+							for k in range(0, len(P)):
+								# if it is a neighbour (from outgoing edges)
+								if P[i][k]:
+									match	= False
+									for p in range(0, len(H[0])):
+										# check if we can match a candidate neighbour
+										# (from M* to to the graph (H))
+										if M[k][p] and H[j][p]:
+											if self.optimize:
+												# also check correct type
+												if pp[k].type != h[p].type:
+													continue
+											match	= True
+											break
+									if not match:
+										M[i][j]	= False
+										any_changes	= True
+								
+								# if it is a neighbour (from incoming edges)
+								if P[k][i]:
+									match	= False
+									for p in range(0, len(H[0])):
+										# check if we can match a candidate neighbour
+										# (from M* to to the graph (H))
+										if M[k][p] and H[p][j]:
+											if self.optimize:
+												# also check correct type
+												if pp[i].type != h[j].type:
+													continue
+											match	= True
+											break
+									if not match:
+										M[i][j]	= False
+										any_changes	= True
+
+		def findM(M_star, M, order, H, P, h, p, index_M=0):
+			"""
+			Find an isomorphic mapping for the vertices of P to H.
+			This mapping is represented by a matrix M if,
+			and only if M(MH)^T = P^T.
+			"""
+			# We are at the end, we found an candidate.
+			# Remember that we are at the end, bu first check if there is
+			# a row with ony False, if so, we do not need to check if it is
+			# properly connected.
+			check_prop	= False
+			if index_M == len(M):
+				check_prop	= True
+				index_M		-= 1
+
+			# we need to refer to this row
+			old_row	= M_star[index_M]
+			# previous rows (these are sparse, 1 per row, save only its position)
+			prev_pos	= []
+			for i in range(0, index_M):
+				row	= M[i]
+				only_false	= True
+				for j in range(0, len(old_row)):
+					if row[j]:
+						only_false	= False
+						prev_pos.append(j)
+						break
+				if only_false:
+					# check if a row with only False occurs,
+					# if so, we will not find an occurence
+					return False
+
+			# We are at the end, we found an candidate.
+			if check_prop:
+				index_M	+= 1
+				return propConnected(M, H, P, h, p)
+
+			M[index_M]	= [False] * len(old_row)
+			index_order	= 0
+			for index_order in range(0, len(order)):
+				index_row	= order[index_order]
+				# put previous True back on False
+				if index_order > 0:
+					M[index_M][order[index_order - 1]]	= False
+
+				if old_row[index_row]:
+					M[index_M][index_row]	= True
+
+					findMPart	= True
+					# 1 0 0 	Assume 3th round, and we select x,
+					# 0 1 0		no element at the same possition in the row,
+					# 0 x 0 	of the elements above itselve in the same
+					# column may be 1. In the example it is, then try
+					# selecting an other element.
+					for index_column in range(0, index_M):
+						if M[index_column][index_row]:
+							findMPart	= False
+							break
+
+					if not findMPart:
+						continue
+
+					refineM(M, H, P, h, p)
+
+					if findM(M_star, M, order, H, P, h, p, index_M + 1):
+						return True
+
+					# reset previous rows their True's
+					prev_row	= 0
+					for pos in prev_pos:
+						M[prev_row][pos]	= True
+						prev_row	+= 1
+					# reset rows below current row
+					for index_column in range(index_M + 1, len(M)):
+						# deep copy, we do not want to just copy pointer to array/list
+						M[index_column]	= M_star[index_column][:]
+
+			# reset current row (the rest is already reset)
+			M[index_M]	= M_star[index_M][:]
+
+			return False
+
+		# create adjecency matrix of the graph
+		H, h	= self.createAdjacencyMatrixMap(self.graph)
+		# create adjecency matrix of the pattern
+		P, p	= self.createAdjacencyMatrixMap(self.pattern)
+		# create M* binary matrix
+		M_star	= createM_star(h, p)
+
+		# create the order we will use later on
+		order	= createDecreasingOrder(h)
+		# deepcopy M_s into M
+		M		= [row[:] for row in M_star]
+
+		if self.optimize:
+			refineM(M, H, P, h, p)
+
+		findM(M_star, M, order, H, P, h, p)
+
+		return self.result
+
+
+	def matchVF2(self):
+
+		class VF2_Obj(object):
+			"""
+			Structor for keeping the VF2 data.
+			"""
+			def __init__(self, len_graph_vertices, len_pattern_vertices):
+				# represents if n-the element (h[n] or p[n]) matched
+				self.core_graph		= [False]*len_graph_vertices
+				self.core_pattern	= [False]*len_pattern_vertices
+
+				# save mapping from pattern to graph
+				self.mapping		= {}
+
+				# preference lvl 1
+				# ordered set of vertices adjecent to M_graph connected via an outgoing edge
+				self.N_out_graph	= [-1]*len_graph_vertices
+				# ordered set of vertices adjecent to M_pattern connected via an outgoing edge
+				self.N_out_pattern	= [-1]*len_pattern_vertices
+				
+				# preference lvl 2
+				# ordered set of vertices adjecent to M_graph connected via an incoming edge
+				self.N_inc_graph	= [-1]*len_graph_vertices
+				# ordered set of vertices adjecent to M_pattern connected via an incoming edge
+				self.N_inc_pattern	= [-1]*len_pattern_vertices
+
+				# preference lvl 3
+				# not in the above
+
+		def findM(H, P, h, p, VF2_obj, index_M=0):
+			"""
+			Find an isomorphic mapping for the vertices of P to H.
+			This mapping is represented by a matrix M if,
+			and only if M(MH)^T = P^T.
+
+			This operates in a simular way as Ullmann. Ullmann has a predefind
+			order for  matching (sorted on most edges first). VF2's order is to
+			first try to match the adjacency vertices connected via outgoing
+			edges, then thos connected via incoming edges and then those that
+			not connected to the currently mathed vertices.
+			"""
+			def addOutNeighbours(neighbours, N, index_M):
+				"""
+				Given outgoing neighbours (a row from an adjacency matrix), 
+				label them as added by saving when they got added (index_M
+				represents this, otherwise it is -1)
+				"""
+				for neighbour_index in range(0, len(neighbours)):
+					if neighbours[neighbour_index]:
+						if N[neighbour_index]	== -1:
+							N[neighbour_index]	= index_M
+
+			def addIncNeighbours(G, j, N, index_M):
+				"""
+				Given the adjacency matrix, and the colum j, representing that 
+				we want to add the incoming edges to vertex j,
+				label them as added by saving when they got added (index_M
+				represents this, otherwise it is -1)
+				"""
+				for i in range(0, len(G)):
+					if G[i][j]:
+						if N[i] == -1:
+							N[i]	= index_M
+
+			def delNeighbours(N, index_M):
+				"""
+				Remove neighbours that where added at index_M.
+				If we call this function, we are backtracking and we want to
+				remove the added neighbours from the just tried matching (n, m)
+				pair (whiched failed). 
+				"""
+				for n in range(0, len(N)):
+					if N[n] == index_M:
+						N[n]	= -1
+
+			def feasibilityTest(H, P, h, p, VF2_obj, n, m):
+				"""
+				Examine all the nodes connected to n and m; if such nodes are
+				in the current partial mapping, check if each branch from or to
+				n has a corresponding branch from or to m and vice versa.
+
+				If the nodes and the branches of the graphs being matched also
+				carry semantic attributes, another condition must also hold for
+				F(s, n, m) to be true; namely the attributes of the nodes and of
+				the branches being paired must be compatible.
+
+				Another pruning step is to check if the nr of ext_edges between
+				the matched_vertices from the pattern and its adjecent vertices
+				are less than or equal to the nr of ext_edges between
+				matched_vertices from the graph and its adjecent vertices.
+
+				And if the nr of ext_edges between those adjecent vertices from
+				the pattern and the not connected vertices are less than or
+				equal to the nr of ext_edges between those adjecent vertices from
+				the graph and its adjecent vertices.
+				"""
+				# Get all neighbours from graph node n and pattern node m
+				# (including n and m)
+				neighbours_graph				= {}
+				neighbours_graph[h[n].type]		= set([h[n]])
+
+				neighbours_pattern				= {}
+				neighbours_pattern[p[m].type]	= set([p[m]])
+
+				# add all neihgbours of pattern vertex m
+				for i in range(0, len(P)):	# P is a nxn-matrix
+					if (P[m][i] or P[i][m])  and VF2_obj.core_pattern[i]:
+						neighbours_pattern.setdefault(p[i].type, set()).add(p[i])
+
+				# add all neihgbours of graph vertex n
+				for i in range(0, len(H)):	# P is a nxn-matrix
+					if (H[n][i] or H[i][n])  and VF2_obj.core_graph[i]:
+						neighbours_graph.setdefault(h[i].type, set()).add(h[i])
+
+				# take a coding shortcut,
+				# use self.matchNaive function to see if it is feasable.
+				# this way, we immidiatly test the semantic attributes
+				if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges):
+					return False
+
+				# count ext_edges from core_graph to a adjecent vertices and
+				# cuotn ext_edges for adjecent vertices and not matched vertices
+				# connected via the ext_edges
+				ext_edges_graph_ca	= 0
+				ext_edges_graph_an	= 0
+				# for all core vertices
+				for x in range(0, len(VF2_obj.core_graph)):
+					# for all its neighbours
+					for y in range(0, len(H)):
+						if H[x][y]:
+							# if it is a neighbor and not yet matched
+							if (VF2_obj.N_out_graph[y] != -1 or VF2_obj.N_inc_graph[y] != -1) and VF2_obj.core_graph[y]:
+								# if we matched it
+								if VF2_obj.core_graph[x] != -1:
+									ext_edges_graph_ca	+= 1
+								else:
+									ext_edges_graph_an	+= 1
+
+				# count ext_edges from core_pattern to a adjecent vertices
+				# connected via the ext_edges
+				ext_edges_pattern_ca	= 0
+				ext_edges_pattern_an	= 0
+				# for all core vertices
+				for x in range(0, len(VF2_obj.core_pattern)):
+					# for all its neighbours
+					for y in range(0, len(P)):
+						if P[x][y]:
+							# if it is a neighbor and not yet matched
+							if (VF2_obj.N_out_pattern[y] != -1 or VF2_obj.N_inc_pattern[y] != -1) and VF2_obj.core_pattern[y]:
+								# if we matched it
+								if VF2_obj.core_pattern[x] != -1:
+									ext_edges_pattern_ca	+= 1
+								else:
+									ext_edges_pattern_an	+= 1
+
+				# The nr of ext_edges between matched_vertices from the pattern
+				# and its adjecent vertices must be less than or equal to the nr
+				# of ext_edges between matched_vertices from the graph and its
+				# adjecent vertices, otherwise we wont find an occurrence
+				if ext_edges_pattern_ca > ext_edges_graph_ca:
+					return False
+
+				# The nr of ext_edges between those adjancent vertices from the
+				# pattern and its not connected vertices must be less than or
+				# equal to the nr of ext_edges between those adjacent vertices
+				# from the graph and its not connected vertices,
+				# otherwise we wont find an occurrence
+				if ext_edges_pattern_an > ext_edges_graph_an:
+					return False
+
+				return True
+
+			def matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
+				"""
+				The matching fase of the VF2 algorithm. If the chosen n, m pair
+				passes the feasibilityTest, the pair gets added and we start
+				to search for the next matching pair.
+				"""
+				# all candidate pair (n, m) represent graph x pattern
+
+				if feasibilityTest(H, P, h, p, VF2_obj, n, m):
+					# adapt VF2_obj
+					VF2_obj.core_graph[n]	= True
+					VF2_obj.core_pattern[m]	= True
+					VF2_obj.mapping[h[n]]	= p[m]
+					addOutNeighbours(H[n], VF2_obj.N_out_graph, index_M)
+					addIncNeighbours(H, n, VF2_obj.N_inc_graph, index_M)
+					addOutNeighbours(P[m], VF2_obj.N_out_pattern, index_M)
+					addIncNeighbours(P, m, VF2_obj.N_inc_pattern, index_M)
+
+					if findM(H, P, h, p, VF2_obj, index_M + 1):
+						return True
+
+					# else, cleanup, adapt VF2_obj
+					VF2_obj.core_graph[n]	= False
+					VF2_obj.core_pattern[m]	= False
+					del VF2_obj.mapping[h[n]]
+					delNeighbours(VF2_obj.N_out_graph, index_M)
+					delNeighbours(VF2_obj.N_inc_graph, index_M)
+					delNeighbours(VF2_obj.N_out_pattern, index_M)
+					delNeighbours(VF2_obj.N_inc_pattern, index_M)
+
+				return False
+
+			def preferred(H, P, h, p, index_M, VF2_obj, N_graph, N_pattern):
+				"""
+				Try to match the adjacency vertices connected via outgoing
+				or incoming edges. (Depending on what is given for N_graph and
+				N_pattern.)
+				"""
+				for n in range(0, len(N_graph)):
+					# skip graph vertices that are not in VF2_obj.N_out_graph
+					# (or already matched)
+					if N_graph[n] == -1 or VF2_obj.core_graph[n]:
+						continue
+					for m in range(0, len(N_pattern)):
+						# skip graph vertices that are not in VF2_obj.N_out_pattern
+						# (or already matched)
+						if N_pattern[m] == -1 or VF2_obj.core_pattern[m]:
+							continue
+						if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
+							return True
+
+				return False
+
+			def leastPreferred(H, P, h, p, index_M, VF2_obj):
+				"""
+				Try to match the vertices that are not connected to the curretly
+				matched vertices.
+				"""
+				for n in range(0, len(VF2_obj.N_out_graph)):
+					# skip vertices that are connected to the graph 
+					# (or already matched)
+					if not (VF2_obj.N_out_graph[n] == -1 and VF2_obj.N_inc_graph[n] == -1) or VF2_obj.core_graph[n]:
+						continue
+					for m in range(0, len(VF2_obj.N_out_pattern)):
+						# skip vertices that are connected to the graph 
+						# (or already matched)
+						if not (VF2_obj.N_out_pattern[m] == -1 and VF2_obj.N_inc_pattern[m] == -1) or VF2_obj.core_pattern[m]:
+							continue
+						if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
+							return True
+
+				return False
+
+			# We are at the end, we found an candidate.
+			if index_M == len(p):
+				bound_graph_vertices	= {}
+				for vertex_bound, _ in VF2_obj.mapping.items():
+					bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound)
+
+				self.result	= self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges)
+				return self.result != None
+
+			# try the candidates is the preffered order
+			# first try the adjacent vertices connected via the outgoing edges.
+			if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern):
+				return True
+
+			# then try the adjacent vertices connected via the incoming edges.
+			if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern):
+				return True
+
+			# and lastly, try the vertices not connected to the currently matched vertices
+			if leastPreferred(H, P, h, p, index_M, VF2_obj):
+				return True
+
+			return False
+
+
+		# create adjecency matrix of the graph
+		H, h	= self.createAdjacencyMatrixMap(self.graph)
+		# create adjecency matrix of the pattern
+		P, p	= self.createAdjacencyMatrixMap(self.pattern)
+
+		VF2_obj	= VF2_Obj(len(h), len(p))
+
+		findM(H, P, h, p, VF2_obj)
+
+		return self.result

+ 528 - 0
pattern_matching/planGraph.py

@@ -0,0 +1,528 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+from searchGraph import *
+from enum import *
+
+# Enum for all primitive operation types
+# note: inc represent primitive operation in (as in is a reserved keyword in python)
+PRIM_OP	= Enum(['lkp', 'inc', 'out', 'src', 'tgt'])
+
+class PlanGraph(object):
+	"""
+	Holds the PlanGraph for a pattern.
+	Can create the search plan of the pattern for a given SearchGraph.
+	"""
+	def __init__(self, pattern):
+		if not isinstance(pattern, Graph):
+			raise TypeError('PlanGraph expects the pattern to be a Graph')
+		# member variables:
+		self.vertices	= []	# will not be searched in
+		self.edges		= []	# will not be searched in
+
+		# representation map, maps vertex from pattern to element from PlanGraph
+		# (no need for edges)
+		repr_map		= {}
+
+		# 1.1: for every vertex in the pattern graph,
+		# create a vertex representing the pattern element
+		for str_type, vertices in pattern.vertices.items():
+			for vertex in vertices:
+				# we only need to know the type of the vertex
+				plan_vertex				= Vertex(str_type)
+				# and we need to know that is was a vertex
+				plan_vertex.is_vertex	= True
+				# for re-linking the edges, we'll need to map the
+				# vertex of the pattern to the plan_vertex
+				repr_map[vertex]		= plan_vertex
+				# save created plan_vertex
+				self.vertices.append(plan_vertex)
+		# 1.2: for every edge in the pattern graph,
+		# create a vertex representing the pattern elemen
+		for str_type, edges in pattern.edges.items():
+			for edge in edges:
+				# we only need to know the type of the edge
+				plan_vertex	= Vertex(edge.type)	
+				# and we need to know that is was an edge
+				plan_vertex.is_vertex	= False
+				# save created plan_vertex
+				self.vertices.append(plan_vertex)
+				# 4: for every element x from the PlanGraph
+				# that represents an edge e in the pattern:
+				# 4.1: create an edge labelled tgt from x to the vertex in the PlanGraph
+				# representing the target vertex of e in the pattern graph,
+				# and a reverted edge labelled in
+				# 4.1.1: tgt:
+				plan_edge			= Edge(plan_vertex, repr_map[edge.tgt])
+				# backup src and tgt (Edmonds might override it)
+				plan_edge.orig_src	= plan_edge.src
+				plan_edge.orig_tgt	= plan_edge.tgt
+				plan_edge.label		= PRIM_OP.tgt
+				# link vertices connected to this plan_edge
+				plan_edge.src.addOutgoingEdge(plan_edge)
+				plan_edge.tgt.addIncomingEdge(plan_edge)
+				# tgt and src cost are always 1, we use logaritmic cost,
+				# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
+				# a product, but can minimize a sum
+				# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
+				plan_edge.cost		= 0.0
+				# backup orig cost, as Edmonds changes cost
+				plan_edge.orig_cost	= plan_edge.cost
+				# save created edge
+				self.edges.append(plan_edge)
+				# 4.1.2: in:
+				plan_edge			= Edge(repr_map[edge.tgt], plan_vertex)
+				# backup src and tgt (Edmonds might override it)
+				plan_edge.orig_src	= plan_edge.src
+				plan_edge.orig_tgt	= plan_edge.tgt
+				plan_edge.label		= PRIM_OP.inc
+				# link vertices connected to this plan_edge
+				plan_edge.src.addOutgoingEdge(plan_edge)
+				plan_edge.tgt.addIncomingEdge(plan_edge)
+				# save created edge
+				self.edges.append(plan_edge)
+
+				# 4.2: create an edge labelled src from x to the vertex in the PlanGraph
+				# representing the source vertex of e in the pattern graph
+				# and a reverted edge labelled out
+				# 4.2.1: src
+				plan_edge			= Edge(plan_vertex, repr_map[edge.src])
+				# backup src and tgt (Edmonds might override it)
+				plan_edge.orig_src	= plan_edge.src
+				plan_edge.orig_tgt	= plan_edge.tgt
+				plan_edge.label		= PRIM_OP.src
+				# link vertices connected to this plan_edge
+				plan_edge.src.addOutgoingEdge(plan_edge)
+				plan_edge.tgt.addIncomingEdge(plan_edge)
+				# tgt and src cost are always 1, we use logaritmic cost,
+				# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
+				# a product, but can minimize a sum
+				# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
+				plan_edge.cost		= 0.0
+				# backup orig cost, as Edmonds changes cost
+				plan_edge.orig_cost	= plan_edge.cost
+				# save created edge
+				self.edges.append(plan_edge)				
+				# 4.2.2: out
+				plan_edge			= Edge(repr_map[edge.src], plan_vertex)
+				# backup src and tgt (Edmonds might override it)
+				plan_edge.orig_src	= plan_edge.src
+				plan_edge.orig_tgt	= plan_edge.tgt
+				plan_edge.label		= PRIM_OP.out
+				# link vertices connected to this plan_edge
+				plan_edge.src.addOutgoingEdge(plan_edge)
+				plan_edge.tgt.addIncomingEdge(plan_edge)
+				# save created edge
+				self.edges.append(plan_edge)
+		# 2: create a root vertex
+		self.root	= Vertex('root')
+		# don't add it to the vertices
+
+		# 3: for each element in the PlanGraph (that is not the root vertex),
+		# create an edge from the root to it, and label it lkp
+		for vertex in self.vertices:
+			plan_edge			= Edge(self.root, vertex)
+			# backup src and tgt (Edmonds might override it)
+			plan_edge.orig_src	= plan_edge.src
+			plan_edge.orig_tgt	= plan_edge.tgt
+			plan_edge.label		= PRIM_OP.lkp
+			# link vertices connected to this plan_edge
+			plan_edge.src.addOutgoingEdge(plan_edge)
+			plan_edge.tgt.addIncomingEdge(plan_edge)
+			# save created edge
+			self.edges.append(plan_edge)
+
+	def updatePlanCost(self, graph):
+		"""
+		returns True if sucessfully updated cost,
+		returns False if a type in the pattern is not in the graph.
+		"""
+		if not isinstance(graph, SearchGraph):
+			raise TypeError('updatePlanCost expects a SearchGraph')
+		# update, lkp, in and out (not src and tgt as they are constant)
+
+		for edge in self.edges:
+			if edge.label == PRIM_OP.lkp:
+				edge.cost	= graph.getCostLkp(edge.tgt.type, edge.tgt.is_vertex)
+				if edge.cost == None:
+					print('failed lkp')
+					return False
+			elif edge.label == PRIM_OP.inc:
+				# in(v, e), binds an incoming edge e from an already bound vertex v,
+				# depends on the number of incoming edges of type e for the vertex type
+				edge.cost	= graph.getCostInc(edge.src.type, edge.tgt.type)
+				if edge.cost == None:
+					print('failed in')
+					return False
+			elif edge.label == PRIM_OP.out:
+				# (analogue for out(v, e))
+				edge.cost	= graph.getCostOut(edge.src.type, edge.tgt.type)
+				if edge.cost == None:
+					print('failed out')
+					return False
+			# else: ignore src and tgt
+			# backup orig cost, as Edmonds changes cost
+			edge.orig_cost	= edge.cost
+		return True
+
+	def Edmonds(self, searchGraph):
+		"""
+		Returns the minimum directed spanning tree (MDST)
+		for the pattern and the provided graph.
+		Returns None if it is impossible to find the pattern in the Graph
+		(vertex type of edge type from pattern not in Graph).
+		"""
+		# update the cost for the PlanGraph
+		if not self.updatePlanCost(searchGraph):
+			print('type in pattern not found in Graph (in Edmonds)')
+			# (returns False if a type in the pattern can not be found in the graph)
+			return None
+		# Complete Edmonds algorithm has optimization steps:
+		# a: remove edges entering the root
+		# b: merge parallel edges from same src to same tgt with mim weight
+		# we can ignore this as:
+		# a: the root does not have incoming edges
+		# b: the PlanGraph does not have such paralllel edges
+
+		# 1: for each node v (other than root), find incoming edge with lowest weight
+		# insert those 
+		pi_v		= {}
+		for plan_vertex in self.vertices:
+			min_weight	= float('infinity')
+			min_edge	= None
+			for plan_edge in plan_vertex.incoming_edges:
+				if plan_edge.cost < min_weight:
+					min_weight	= plan_edge.cost
+					min_edge	= plan_edge
+			# save plan_vertex and it's minimum incoming edge
+			pi_v[plan_vertex]	= min_edge
+			if min_edge == None:
+				raise RuntimeError('baka: no min_edge found')
+
+		def getCycle(vertex, reverse_graph, visited):
+			"""
+			Walk from vertex to root, we walk in a reverse order, as each vertex
+			only has one incoming edge, so we walk to the source of that incoming
+			edge. We stop when we already visited a vertex we walked on.
+			In both cases we return None.
+			When we visit a vertex from our current path, we return that cycle,
+			by first removing its tail.
+			"""
+			def addToVisited(walked, visited):
+				for vertex in walked:
+					visited.add(vertex)
+
+			walked			= []	# we could only save it once, but we need order
+			current_path	= set()	# and lookup in an array is slower than in set
+			# we asume root is in visited (it must be in it)
+			while vertex not in visited:
+				if vertex in current_path:
+					# we found a cycle, the cycle however might look like a: O--,
+					# g f e			where we first visited a, then b, c, d,...
+					# h   d c b a	k points back to d, completing a cycle,
+					# i j k			but c b a is the tail that does not belong
+					# in the cycle, removing this is "easy" as we know that
+					# we first visited the tail, so they are the first elements
+					# in our walked path
+					for tail_part in walked:
+						if tail_part != vertex:
+							current_path.remove(tail_part)
+						else:
+							break
+
+					addToVisited(walked, visited)
+					return current_path
+				current_path.add(vertex)
+				walked.append(vertex)
+				# by definition, an MDST only has one incoming edge per vertex
+				# so we follow it upwards
+				# vertex <--(minimal edge)-- src
+				vertex	= reverse_graph[vertex].src
+
+			# no cycle found (the current path let to a visited vertex)
+			addToVisited(walked, visited)	# add walked to visited
+			return None
+
+		class VertexGraph(Vertex):
+			"""
+			Acts as a super vertex, holds a subgraph (that is/was once a cyle).
+			Uses for Edmonds contractions step.
+			The incoming edges are the edges leading to the vertices in the
+			VertexGraph (they exclude edges from a vertex in the cycle to
+			another vertex in the cycle).
+			Analogue for outgoing edges.
+			"""
+			def __init__(self, cycle, reverseGraph):
+				# Call parent class constructor
+				str_type	= ''
+				for vertex in cycle:
+					str_type += str(vertex.type)
+				Vertex.__init__(self, str_type)
+				# member variables:
+				self.internalMDST		= {}
+
+				minIntWeight	= self.findMinIntWeight(cycle, reverseGraph)
+				self.updateMinExtEdge(minIntWeight, reverseGraph)
+
+
+			def findMinIntWeight(self, cycle, reverseGraph):
+				"""
+				Find the the smallest cost of the cycle his internal incoming edges.
+				(Also save its internalMDST (currently a cycle).)
+				(The VertexGraph formed by the cycle will be added to the
+				reverseGraph by calling findMinExtEdge.)
+				"""
+				minIntWeight	= float('infinity')
+
+				cycleEdges	= []
+				origTgts	= []
+				for cyclePart in cycle:
+					cycleEdges.append(reverseGraph[cyclePart])
+					origTgts.append(reverseGraph[cyclePart].orig_tgt)
+
+				for vertex in cycle:
+					# add incoming edges to this VertexGraph
+					for inc_edge in vertex.incoming_edges:
+						# edge from within the cycle
+						if inc_edge.src in cycle:
+							minIntWeight	= min(minIntWeight, inc_edge.cost)
+						else:
+							# edge from outside the cycle
+							self.addIncomingEdge(inc_edge)
+					# add outgoing edges to this VertexGraph
+					for out_edge in vertex.outgoing_edges:
+						if out_edge.tgt not in cycle:
+							# edge leaves the cycle
+							self.addOutgoingEdge(out_edge)
+							# update src to this VertexGraph
+							out_edge.src	= self
+					# save internal MDST
+					min_edge	= reverseGraph[vertex]
+					if min_edge.src in cycle:
+						self.internalMDST[vertex]	= min_edge
+					else:
+						raise TypeError('how is this a cycle')
+
+				return minIntWeight
+
+			def updateMinExtEdge(self, minIntWeight, reverseGraph):
+				"""
+				Modifies all external incoming edges their cost and finds the
+				minimum external incoming edge with this modified weight.
+				This found edge will break the cycle, update the internalMDST
+				from a cycle to an MDST, updates the reverseGraph to include
+				the vertexGraph.
+				"""
+				minExt			= None
+				minModWeight	= -float('infinity')
+
+				# Find incoming edge from outside of the circle with minimal
+				# modified cost. This edge will break the cycle.
+				for inc_edge in self.incoming_edges:
+					# An incoming edge (with src from within the cycle), can be
+					# from a contracted part of the graph. Assume bc is a
+					# contracted part (VertexGraph) a, bc is a newly formed
+					# cycle (due to the breaking of the previous cycle bc). bc
+					# has at least lkp incoming edges to b and c, but we should
+					# not consider the lkp of c to break the cycle.
+					# If we want to break a, bc, select plausable edges,
+					#  /<--\
+					# a     bc   bc's MDST b <-- c
+					#  \-->/
+					# by looking at their original targets.
+					# (if cycle inc_edge.orig_tgt == external inc_edge.orig_tgt)
+					if reverseGraph[inc_edge.tgt].orig_tgt == inc_edge.orig_tgt:
+						# modify costL cost of inc_edge -
+						# (cost of previously choosen minimum edge to cycle vertex - minIntWeight)
+						inc_edge.cost	-= (reverseGraph[inc_edge.tgt].cost - minIntWeight)
+						if minExt is None or minModWeight > inc_edge.cost:
+							# save better edge from outside of the cycle
+							minExt			= inc_edge
+							minModWeight	= inc_edge.cost
+
+				# Example: a, b is a cycle (we know that there are no other
+				# incoming edges to a and/or b, as there is on;y exactly one
+				# incoming edge per vertex), and the arow from c to b represents
+				# the minExt edge. We will remove the bottem arrow (from a to b)
+				#  /<--\			and save the minExt edge in the reverseGraph.
+				# a     b <-- c		This breaks the cycle. As the internalMDST
+				#  \-->/			saves the intenal MDST, and currently still
+				# holds a cycle, we have to remove it from the internalMDST.
+				# We have to remove all vertex bindings of the cycle from the
+				# reverseGraph (as it is contracted into a single VertexGraph),
+				# and store the minExt edge to this VertexGraph in it.
+				for int_vertex, _ in self.internalMDST.items():
+					del reverseGraph[int_vertex]	# remove cycle from reverseGraph
+
+				del self.internalMDST[minExt.tgt]	# remove/break cycle
+
+				for inc_edge in self.incoming_edges:
+					# update inc_edge's target to this VertexGraph
+					inc_edge.tgt	= self
+
+				# save minExt edge to this VertexGraph in the reverseGraph
+				reverseGraph[self]	= minExt
+
+		while True:
+			# 2: find all cycles:
+			cycles	= []
+			visited	= set([self.root])		# root does not have incoming edges,
+			for vertex in list(pi_v.keys()):		# it can not be part of a cycle
+				if vertex not in visited:	# getCycle depends on root being in visited
+					cycle	= getCycle(vertex, pi_v, visited)
+					if cycle != None:
+						cycles.append(cycle)
+
+			# 2: if the set of edges {pi(v), v} does not contain any cycles,
+			# Then we found our minimum directed spanning tree
+			# otherwise, we'll have to resolve the cycles
+			if len(cycles) == 0:
+				break
+
+			# 3: For each formed cycle:
+			# 3a: find internal incoming edge with the smallest cost
+			# 3b: modify the cost of each arc which enters the cycle
+			# 3c: replace smallert internal edge with the modified edge which has the smallest cost
+			for cycle in cycles:
+				# Breaks a cycle by:
+				# - contracting cycle into VertexGraph
+				# - finding the internal incoming edge with the smallest cost
+				# - modify the cost of each arc which enters the cycle
+				# - replacing the smallest internal edge with the modified edge which has the smallest cost
+				# - changing reverseGraph accordingly (removes elements from cycle, ads vertexGraph)
+				# (This will find a solution as the graph keeps shrinking with every cycle,
+				# in the worst case the same amount as there are vertices, until
+				# onlty the root and one vertexGraph remains)
+				vertexGraph	= VertexGraph(cycle, pi_v)
+
+		class SortedContainer(object):
+			"""
+			A container that keeps elemets sorted based on a given sortValue.
+			Elements with the same value, will be returned in the order they got inserted.
+			"""
+			def __init__(self):
+				# member variables:
+				self.keys	= []	# stores key in sorted order (sorted when pop gets called)
+				self.sorted	= {}	# {key, [elems with same key]}
+
+			def add(self, sortValue, element):
+				"""
+				Adds element with sortValue to the SortedContainer.
+				"""
+				elems	= self.sorted.get(sortValue)
+				if elems == None:
+					self.sorted[sortValue]	= [element]
+					self.keys.append(sortValue)
+				else:
+					elems.append(element)
+
+			def pop(self):
+				"""
+				Sorts the SortedContainer, returns element with smallest sortValue.
+				"""
+				self.keys.sort()
+				elems	= self.sorted[self.keys[0]]
+				elem	= elems.pop()
+				if len(elems) == 0:
+					del self.sorted[self.keys[0]]
+					del self.keys[0]
+				return elem
+
+			def empty(self):
+				"""
+				Returns whether or not the sorted container is empty.
+				"""
+				return (len(self.keys) == 0)
+
+		def createPRIM_OP(edge, inc_cost=True):
+			"""
+			Helper function to keep argument list short,
+			return contracted data for a PRIM_OP.
+			"""
+			if edge.label == PRIM_OP.inc or edge.label == PRIM_OP.out:
+				if inc_cost: # op		# vertex type		# actual edge type
+					return (edge.label, edge.orig_src.type, edge.orig_tgt.type, edge.cost)
+				else:
+					return (edge.label, edge.orig_src.type, edge.orig_tgt.type)
+			elif edge.label == PRIM_OP.lkp:
+				if inc_cost: # op		# vertex/edge type	# is vertex or edge
+					return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex, edge.cost)
+				else:
+					return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex)
+			else:	# src, tgt operation
+				if inc_cost: # op		# actual edge type
+					return (edge.label, edge.orig_src.type, edge.cost)
+				else:
+					return (edge.label, edge.orig_src.type)
+
+		def flattenReverseGraph(vertex, inc_edge, reverseGraph):
+			"""
+			Flattens the reverseGraph, so that the vertexGraph node can get
+			processed to create a forwardGraph.
+			"""
+			if not isinstance(vertex, VertexGraph):
+				reverseGraph[vertex]	= inc_edge
+			else:
+				reverseGraph[inc_edge.orig_tgt]	= inc_edge
+				for vg, eg in inc_edge.tgt.internalMDST.items():
+					flattenReverseGraph(vg, eg, reverseGraph)
+			if isinstance(inc_edge.src, VertexGraph):
+				for vg, eg in inc_edge.src.internalMDST.items():
+					flattenReverseGraph(vg, eg, reverseGraph)
+
+		def createForwardGraph(vertex, inc_edge, forwardGraph):
+			"""
+			Create a forwardGraph, keeping in mind that their can be vertexGraph
+			in the reverseGraph.
+			"""
+			if not isinstance(vertex, VertexGraph):
+				forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
+			else:
+				forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
+				for vg, eg in vertex.internalMDST.items():
+					createForwardGraph(vg, eg, forwardGraph)
+
+		MDST	= []
+		# pi_v contains {vertex, incoming_edge}
+		# we want to start from root and follow the outgoing edges
+		# so we have to build the forwardGraph graph for pi_v
+		# (Except for the root (has 0), each vertex has exactly one incoming edge,
+		# but might have multiple outgoing edges)
+		forwardGraph	= {}	# {vertex, [outgoing edge 1, ... ] }
+		reverseGraph	= {}
+
+		# flatten reverseGraph (for the vertexGraph elements)
+		for v, e in pi_v.items():
+			flattenReverseGraph(v, e, reverseGraph)
+
+		# create the forwardGraph
+		for vertex, edge in reverseGraph.items():
+			createForwardGraph(vertex, edge, forwardGraph)
+
+		# create the MDST in a best first manner (lowest value first)
+		current		= SortedContainer()		# allows easy walking true tree
+		for edge in forwardGraph[self.root]:
+			current.add(edge.orig_cost, edge)	# use orig cost, not modified
+		while current.empty() != True:
+			p_op	= current.pop()				# p_op contains an outgoing edge
+			MDST.append(createPRIM_OP(p_op))
+			for edge in forwardGraph.get(p_op.orig_tgt, []):
+				current.add(edge.orig_cost, edge)
+		return MDST

+ 8 - 0
pattern_matching/run.sh

@@ -0,0 +1,8 @@
+#!/bin/sh
+
+python main.py
+dot randomGraph.dot -Tsvg > randomGraph.svg
+dot randomPattern.dot -Tsvg > randomPattern.svg
+
+firefox randomGraph.svg
+firefox randomPattern.svg

+ 115 - 0
pattern_matching/searchGraph.py

@@ -0,0 +1,115 @@
+# coding: utf-8
+
+"""
+Author:		Sten Vercamman
+			Univeristy of Antwerp
+
+Example code for paper: Efficient model transformations for novices
+url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
+
+The main goal of this code is to give an overview, and an understandable
+implementation, of known techniques for pattern matching and solving the
+sub-graph homomorphism problem. The presented techniques do not include
+performance adaptations/optimizations. It is not optimized to be efficient
+but rather for the ease of understanding the workings of the algorithms.
+The paper does list some possible extensions/optimizations.
+
+It is intended as a guideline, even for novices, and provides an in-depth look
+at the workings behind various techniques for efficient pattern matching.
+"""
+
+from graph import *
+
+import math
+
+class SearchGraph(Graph):
+	"""
+	A SearchGraph is an extended Graph, it keeps traks of statistics
+	for creating the cost model when generating a search plan.
+	It stire the amount of edges for each edge.type per vertex.type.
+	"""
+	def __init__(self, orig=None, deepCopy=False):
+		Graph.__init__(self)
+		# member variables:
+		self.nr_of_inc_edges	= {}	# {vertex_type, {edge_type, nr of incoming edges of edge_type for vertex_type } }
+		self.nr_of_out_edges	= {}	# {vertex_type, {edge_type, nr of outgoing edges of edge_type for vertex_type } }
+
+		if orig != None:
+			if not (isinstance(orig, Graph) or isinstance(orig, SearchGraph)):
+				raise TypeError('Can only create SearchGraph from Graph and SearchGraph types')
+			if not deepCopy:
+				# copy all memeber elements:
+				self.vertices	= orig.vertices	# this is a reference
+				self.edges		= orig.edges	# this is a reference
+				# udpate the edge counters for each edge
+				for _, edges in self.edges.items():
+					for edge in edges:
+						self.addToEdgeCounters(edge)
+			else: # TODO: deepcopy (not really needed)
+				pass
+
+	def addCreateEdge(self, src, tgt, str_type):
+		"""
+		Creates edge of str_type from src to tgt, and returns it,
+		so that properties can be added to the edge.
+		This also add the Edge to the Edge counters
+		"""
+		# call parent fucntion, this function is an extention
+		edge	= Graph.addCreateEdge(self, src, tgt, str_type)
+		self.updateEdgeCounters(edge)
+		return edge
+
+	def addToEdgeCounters(self, edge):
+		"""
+		Add the Edge to the Edge counters.
+		"""
+		# get {edge.type, counter} for tgt vertex of edge (or create it)
+		edge_counters				= self.nr_of_inc_edges.setdefault(edge.tgt.type, {})
+		# increase counter of edge.type by 1
+		edge_counters[edge.type]	= edge_counters.get(edge.type, 0) + 1
+		# get {edge.type, counter} for src vertex of edge (or create it)
+		edge_counters				= self.nr_of_out_edges.setdefault(edge.src.type, {})
+		# increase counter of edge.type by 1
+		edge_counters[edge.type]	= edge_counters.get(edge.type, 0) + 1
+
+	def getCostLkp(self, type, is_vertex):
+		"""
+		Returns the cost of a lkp primitive operation (of a vertex or edge).
+		Returns None if vertex type or edge type not present in Host Graph
+		"""
+		if is_vertex:
+			cost	= len(self.getVerticesOfType(type))
+		else:
+			cost	= len(self.getEdgesOfType(type))
+		if cost == 0:
+			return None
+		# we use a logaritmic cost
+		return math.log(cost)
+
+	def getCostInc(self, vertex_type, edge_type):
+		"""
+		Returns the cost of an in primitive operation.
+		Returns None if vertex_type or edge_type not present in Host Graph
+		"""
+		cost	= float(self.nr_of_inc_edges.get(vertex_type, {}).get(edge_type))
+		if cost != None:
+			nr_of_vertices_with_type	= len(self.getVerticesOfType(vertex_type))
+			if nr_of_vertices_with_type != 0:
+				cost	/= len(self.getVerticesOfType(vertex_type))
+				# we use a logaritmic cost
+				cost	= math.log(cost)
+		return cost
+
+	def getCostOut(self, vertex_type, edge_type):
+		"""
+		Returns the cost of an out primitive operation.
+		Returns None if vertex_type or edge_type not present in Host Graph
+		"""
+		cost	= float(self.nr_of_out_edges.get(vertex_type, {}).get(edge_type))
+		if cost != None:
+			nr_of_vertices_with_type	= len(self.getVerticesOfType(vertex_type))
+			if nr_of_vertices_with_type != 0:
+				cost	/= len(self.getVerticesOfType(vertex_type))
+				# we use a logaritmic cost
+				cost	= math.log(cost)
+		return cost