瀏覽代碼

Turn VF2 matcher into a generator that yields all matches

Joeri Exelmans 11 月之前
父節點
當前提交
d2c996f4f7
共有 2 個文件被更改,包括 40 次插入459 次删除
  1. 9 9
      pattern_matching/main.py
  2. 31 450
      pattern_matching/patternMatching.py

+ 9 - 9
pattern_matching/main.py

@@ -31,10 +31,10 @@ if __name__ == '__main__':
 	"""
 	The main function called when running from the command line.
 	"""
-	nr_of_vertices		= 10
-	nr_of_diff_types_v	= 0
-	nr_of_edges			= 20
-	nr_of_diff_types_e	= 0
+	nr_of_vertices		= 50
+	nr_of_diff_types_v	= 2
+	nr_of_edges			= 150
+	nr_of_diff_types_e	= 2
 
 	dv      = [random.randint(0, nr_of_diff_types_v) for _ in range(nr_of_vertices)]
 	de      = [random.randint(0, nr_of_diff_types_e) for _ in range(nr_of_edges)]
@@ -47,10 +47,10 @@ if __name__ == '__main__':
 	# dc_inc	= [ 0,25,18,47,22,25,16,45,38,25,5,45,15,44,17,46,6,17,35,8,16,29,48,47,25,34,4,20,24,1,47,44,8,25,32,3,16,6,33,21,6,13,41,10,17,25,21,33,31,30,5,4,45,26,16,42,12,25,29,3,32,30,14,26,11,13,7,13,3,43,43,22,48,37,20,28,15,40,19,33,43,16,49,36,11,25,9,42,3,22,16,40,42,44,27,30,1,18,10,35,19,6,9,43,37,38,45,19,41,14,37,45,0,31,29,31,24,20,44,46,8,45,43,3,38,38,35,12,19,45,7,34,20,28,12,17,45,17,35,49,20,21,49,1,35,38,38,36,33,30 ]
 	# dc_out	= [ 9,2,49,49,37,33,16,21,5,46,4,15,9,6,14,22,16,33,23,21,15,31,37,23,47,3,30,26,35,9,29,21,39,32,22,43,5,9,41,30,31,30,37,33,31,34,23,22,34,26,44,36,38,33,48,5,9,34,13,7,48,41,43,26,26,7,12,6,12,28,22,8,29,22,24,27,16,4,31,41,32,15,19,20,38,0,26,18,43,46,40,17,29,14,34,14,32,17,32,47,16,45,7,4,35,22,42,11,38,2,0,29,4,38,17,44,9,23,5,10,31,17,1,11,16,5,37,27,35,32,45,16,18,1,14,4,42,24,43,31,21,38,6,34,39,46,20,1,38,47 ]
 
-	dv = [0, 1, 0, 1, 0]
-	de = [0, 0, 0]
-	dc_inc = [0, 2, 4]
-	dc_out = [1, 3, 3]
+	# dv = [0, 1, 0, 1, 0]
+	# de = [0, 0, 0]
+	# dc_inc = [0, 2, 4]
+	# dc_out = [1, 3, 3]
 	
 	gg	= GraphGenerator(dv, de, dc_inc, dc_out, debug)
 
@@ -73,7 +73,7 @@ if __name__ == '__main__':
 	#PM	= PatternMatching('SP')
 	# PM	= PatternMatching('Ullmann')
 	PM	= PatternMatching('VF2')
-	matches = PM.match(pattern, graph)
+	matches = [m for m in PM.matchVF2(pattern, graph)]
 	print("found", len(matches), "matches:", matches)
 
 	# regenerate graph, to show matched pattern

+ 31 - 450
pattern_matching/patternMatching.py

@@ -28,62 +28,17 @@ class PatternMatching(object):
 	"""
 	Returns an occurrence of a given pattern from the given Graph
 	"""
-	def __init__(self, matching_type='SP', optimize=True):
+	def __init__(self, optimize=True):
 		# store the type of matching we want to use
-		self.type			= matching_type
-		self.bound_vertices	= {}	# saves the currently bound vertices
-		self.bound_edges	= {}	# saves the currently bound edges
-		self.result			= None
-		self.previous		= []
 		self.optimize		= optimize
-		self.results        = []
 
-	def match(self, pattern, graph):
-		"""
-		Call this function to find an occurrence of the pattern in the (host) graph.
-		Setting the type of matching (naive, SP, Ullmann, VF2) is done by
-		setting self.matching_type to its name.
-		"""
-		if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)):
-			raise TypeError('pattern must be a SearchGraph or Graph')
-		if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)):
-			raise TypeError('graph must be a SearchGraph or Graph')
-
-		self.pattern	= pattern
-		self.graph		= graph
-
-		if self.type	== 'naive':
-			result	= self.matchNaive(vertices=graph.vertices, edges=graph.edges)
-		elif self.type	== 'SP':
-			result	= self.matchSP()
-		elif self.type	== 'Ullmann':
-			result	= self.matchUllmann()
-		elif self.type	== 'VF2':
-			result	= self.matchVF2()
-		else:
-			raise ValueError('Unknown type for matching')
-
-		# cleanup
-		self.pattern		= None
-		self.graph			= None
-		self.bound_vertices	= {}
-		self.bound_edges	= {}
-		self.result			= None
-		self.results        = []
-
-		return result
-
-	def matchNaive(self, pattern_vertices=None, vertices=None, edges=None):
+	def matchNaive(self, pattern, vertices, edges, pattern_vertices=None):
 		"""
 		Try to find an occurrence of the pattern in the Graph naively. 
 		"""
 		# allow call with specific arguments
 		if pattern_vertices		== None:
-			pattern_vertices	= self.pattern.vertices
-		if vertices		== None:
-			vertices	= self.bound_vertices
-		if edges		== None:
-			edges		= self.bound_edges
+			pattern_vertices	= pattern.vertices
 
 		def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
 			"""
@@ -238,137 +193,7 @@ class PatternMatching(object):
 		else:
 			return None
 
-	def matchSP(self):
-		"""
-		Find an occurrence of the pattern in the Graph
-		by using the generated SearchPlan.
-		"""
-		if isinstance(self.graph, Graph):
-			sg	= SearchGraph(self.graph)
-		elif isinstance(self.graph, SearchGraph):
-			sg = self.graph
-		else:
-			raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph')
-
-		pg	= PlanGraph(self.pattern)
-		SP	= pg.Edmonds(sg)
-
-		self.fileIndex = 0
-
-		def propConnected():
-			"""
-			Checks if the found vertices and edges can be uniquely matched
-			onto the pattern graph.
-			"""
-			self.result = self.matchNaive()
-			return self.result != None
-
-		def matchOP(elem, bound, ops, index):
-			"""
-			Execute a primitive operation, return whether ot not it succeeded.
-			"""
-			type_bound	= bound.setdefault(elem.type, set())
-			# if elem not yet bound, bind it, and try matching the next operations
-			if elem not in type_bound:
-				type_bound.add(elem)
-				# if matching of next operation failed, try with a different elem
-				if matchAllOP(ops, index+1):
-					return True
-				else:
-					type_bound.remove(elem)
-			return False
-
-		def matchAllOP(ops, index=0):
-			"""
-			Try to match an occurrence of the pattern in the graph,
-			by recursivly ,atching elements that adhere to the SearchPlan
-			"""
-			# if we matched all elements,
-			# check if the bound elements are properly connected
-			if index == len(ops):
-				return propConnected()
-
-			op = ops[index]
-
-			if op[0] == PRIM_OP.lkp:	# lkp(elem)
-				if op[2]:	# lookup a vertex
-					# If the graph does not have a vertex of the same vertex
-					# type, we'll have to return False, happens if elems == [].
-					elems	= self.graph.vertices.get(op[1], [])
-					bound	= self.bound_vertices
-				else:		# loopup an edge
-					# If the graph does not have an edge of the same edge
-					# type, we'll have to return False, happens if elems == [].
-					elems	= self.graph.edges.get(op[1], [])
-					bound	= self.bound_edges
-				
-				# if elems == [], we'll skip the loop and return False
-				for elem in elems:
-					if matchOP(elem, bound, ops, index):
-						return True
-				# if all not bound elems fails, backtrack
-				return False
-
-			elif op[0] == PRIM_OP.src:	# src(e): bind src of a bound edge e
-				# Should always succeed, as the edge must be already bound
-				# (there should be at least one elem in self.bound_edges[op[1]]).
-				for edge in self.bound_edges[op[1]]:
-					if matchOP(edge.src, self.bound_vertices, ops, index):
-						return True
-				# if all not bound elems fails, backtrack
-				return False
-
-			elif op[0] == PRIM_OP.tgt:	# tgt(e): bind tgt of a bound edge e
-				# Should always succeed, as the edge must be already bound
-				# (there should be at least one elem in self.bound_edges[op[1]]).
-				for edge in self.bound_edges[op[1]]:
-					if matchOP(edge.tgt, self.bound_vertices, ops, index):
-						return True
-				# if all not bound elems fails, backtrack
-				return False
-
-			elif op[0] == PRIM_OP.inc:	# in(v, e):  bind incoming edge e of a bound vertex v
-				# It's possible we will try to find a vertex of a certain type
-				# in the bound_vertices which should be bound implicitly
-				# (by a src/tgt op), that is not bound. Happens when implicit
-				# binding bounded a "wrong" vertex. We then need to return False
-				# (happens by skiping for loop by looping over [])
-				for vertex in self.bound_vertices.get(op[1], []):
-					for edge in vertex.incoming_edges:
-						if edge.type == op[2]:
-							if matchOP(edge, self.bound_edges, ops, index):
-								return True
-				# if all not bound elems fails, backtrack
-				return False
-
-			elif op[0] == PRIM_OP.out:	# out(v, e): bind outgoing edge e of a bound vertex v
-				# Return False if we expect an element to be bound that is not
-				# bound (for the same reason as the inc op).
-				for vertex in self.bound_vertices.get(op[1], []):
-					for edge in vertex.outgoing_edges:
-						if edge.type == op[2]:
-							if matchOP(edge, self.bound_edges, ops, index):
-								return True
-				# if all not bound elems fails, backtrack
-				return False
-			else:
-				raise TypeError('Unknown PRIM_OP type')
-
-		# try and match all (primitive) operations from the SearchPlan
-		matchAllOP(SP)
-
-		# Either nothing is found, or we found an occurrence,
-		# it is impossble to have a partionally matched occurrence
-		for key, bound_elems in self.bound_vertices.items():
-			if len(bound_elems) == 0:
-				# The pattern does not exist in the Graph
-				return None
-			else:
-				# We found a pattern
-				return self.result
-		
-
-	def createAdjacencyMatrixMap(self, graph):
+	def createAdjacencyMatrixMap(self, graph, pattern):
 		"""
 		Return adjacency matrix and the order of the vertices.
 		"""
@@ -380,7 +205,7 @@ class PatternMatching(object):
 		if self.optimize:
 			# insert only the vertices from the graph which have a type
 			# that is present in the pattern
-			for vertex_type, _ in self.pattern.vertices.items():
+			for vertex_type, _ in pattern.vertices.items():
 				graph_vertices	= graph.vertices.get(vertex_type)
 				if graph_vertices	!= None:
 					allVertices.extend(graph_vertices)
@@ -419,255 +244,7 @@ class PatternMatching(object):
 
 		return AM, vertices_order
 
-	def matchUllmann(self):
-		"""
-		Find an occurrence of the pattern in the Graph
-		by using Ullmann for solving the Constraint Satisfaction Problem (CSP).
-		"""
-
-		def createM_star(h, p):
-			"""
-			Create M*[v, w]	= 1 if deg(v) <= deg(w), for v in V_P, w in V_H
-							= 0 otherwise
-
-			M and P are given to ensure corect order.
-			"""
-			m		= []	# [[..], ...]
-			for p_vertex in p:
-				row	= []
-				for g_vertex in h:
-					# for the degree function, we choose to look at the
-					# outgoing edges AND the incoming edges
-					# (one might prefer to use only one of them)
-					if self.optimize:
-						# also check if type matches
-						if p_vertex.type != g_vertex.type:
-							row.append(False)
-							continue
-					row.append(	len(p_vertex.incoming_edges) <=
-								len(g_vertex.incoming_edges) and
-								len(p_vertex.outgoing_edges) <=
-								len(g_vertex.outgoing_edges))
-				m.append(row)
-
-			return m
-
-		def createDecreasingOrder(h):
-			"""
-			It turns out that the more edges a vertex has, the sooner it will
-			fail in matching the pattern. For efficiency reasons, we want it
-			to fail as fast as possible.
-			"""
-			order	= []	# [(value, index), ...]
-			index	= 0
-			for g_vertex in h:
-				order.append((	len(g_vertex.outgoing_edges) +
-								len(g_vertex.outgoing_edges), index))
-				index	+= 1
-
-			order.sort(key = lambda elem: elem[0])
-			# sort and only return the indices (which specify the order)
-			return [index for (_, index) in order]
-
-		def propConnected(M, H, P, h, p):
-			"""
-			Checks if the vertices represented in M are isomorphic to P and if
-			they can be matched onto the pattern graph.
-			"""
-			print(M, H, P, h, p)
-			# P_candi	= np.dot(M, np.transpose(np.dot(M, H)))
-
-
-			"""
-			# If we do not aply the refineM function, we will want to check if
-			# this succeeds, as it checks for isomorphism.
-			# If we apply the refineM function, it is garanteed to be isomorphic.
-
-			index_column	= 0
-			for row in P_candi:
-				index_row	= 0
-				for item in row:
-					# for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1
-					# (not the other way around)
-					# (return False when item is 0 and P[i,j] is 1)
-					if item < P[index_row][index_column]:
-						return False
-					index_row	+= 1
-				index_column	+= 1
-			"""
-
-			vertices	= {}
-			index_column	= 0
-			for row in M:
-				index_row	= 0
-				for item in row:
-					# there should only be one item per row
-					if item:
-						vertex	= h[index_row]
-						vertices.setdefault(vertex.type, set()).add(vertex)
-						break
-					index_row	+= 1
-				index_column	+= 1
-
-			self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges)
-			return self.result != None
-
-		def refineM(M, H, P, h, pp):
-			"""
-			Refine M, for every vertex from the pattern, check if each possible
-			matching (candidate) his neighbours can also be matched. (M's column
-			represents vertices from P, and the row represents its candidate.)
-			If this is not possible set M[i,j] to false, refining/reducing the
-			search space.
-			"""
-			any_changes=True
-			while any_changes:
-				any_changes = False
-				# for all vertices from the pattern
-				for i in range(0, len(P)):	# P is a nxn-matrix
-					# for all its possible assignments
-					for j in range(0, len(H[0])):
-						# if bound vertex of P, check if all neigbours are matchable
-						if M[i][j]:
-							# for all the pattern his neighbours
-							for k in range(0, len(P)):
-								# if it is a neighbour (from outgoing edges)
-								if P[i][k]:
-									match	= False
-									for p in range(0, len(H[0])):
-										# check if we can match a candidate neighbour
-										# (from M* to to the graph (H))
-										if M[k][p] and H[j][p]:
-											if self.optimize:
-												# also check correct type
-												if pp[k].type != h[p].type:
-													continue
-											match	= True
-											break
-									if not match:
-										M[i][j]	= False
-										any_changes	= True
-								
-								# if it is a neighbour (from incoming edges)
-								if P[k][i]:
-									match	= False
-									for p in range(0, len(H[0])):
-										# check if we can match a candidate neighbour
-										# (from M* to to the graph (H))
-										if M[k][p] and H[p][j]:
-											if self.optimize:
-												# also check correct type
-												if pp[i].type != h[j].type:
-													continue
-											match	= True
-											break
-									if not match:
-										M[i][j]	= False
-										any_changes	= True
-
-		def findM(M_star, M, order, H, P, h, p, index_M=0):
-			"""
-			Find an isomorphic mapping for the vertices of P to H.
-			This mapping is represented by a matrix M if,
-			and only if M(MH)^T = P^T.
-			"""
-			# We are at the end, we found an candidate.
-			# Remember that we are at the end, bu first check if there is
-			# a row with ony False, if so, we do not need to check if it is
-			# properly connected.
-			check_prop	= False
-			if index_M == len(M):
-				check_prop	= True
-				index_M		-= 1
-
-			# we need to refer to this row
-			old_row	= M_star[index_M]
-			# previous rows (these are sparse, 1 per row, save only its position)
-			prev_pos	= []
-			for i in range(0, index_M):
-				row	= M[i]
-				only_false	= True
-				for j in range(0, len(old_row)):
-					if row[j]:
-						only_false	= False
-						prev_pos.append(j)
-						break
-				if only_false:
-					# check if a row with only False occurs,
-					# if so, we will not find an occurence
-					return False
-
-			# We are at the end, we found an candidate.
-			if check_prop:
-				index_M	+= 1
-				return propConnected(M, H, P, h, p)
-
-			M[index_M]	= [False] * len(old_row)
-			index_order	= 0
-			for index_order in range(0, len(order)):
-				index_row	= order[index_order]
-				# put previous True back on False
-				if index_order > 0:
-					M[index_M][order[index_order - 1]]	= False
-
-				if old_row[index_row]:
-					M[index_M][index_row]	= True
-
-					findMPart	= True
-					# 1 0 0 	Assume 3th round, and we select x,
-					# 0 1 0		no element at the same possition in the row,
-					# 0 x 0 	of the elements above itselve in the same
-					# column may be 1. In the example it is, then try
-					# selecting an other element.
-					for index_column in range(0, index_M):
-						if M[index_column][index_row]:
-							findMPart	= False
-							break
-
-					if not findMPart:
-						continue
-
-					refineM(M, H, P, h, p)
-
-					if findM(M_star, M, order, H, P, h, p, index_M + 1):
-						return True
-
-					# reset previous rows their True's
-					prev_row	= 0
-					for pos in prev_pos:
-						M[prev_row][pos]	= True
-						prev_row	+= 1
-					# reset rows below current row
-					for index_column in range(index_M + 1, len(M)):
-						# deep copy, we do not want to just copy pointer to array/list
-						M[index_column]	= M_star[index_column][:]
-
-			# reset current row (the rest is already reset)
-			M[index_M]	= M_star[index_M][:]
-
-			return False
-
-		# create adjecency matrix of the graph
-		H, h	= self.createAdjacencyMatrixMap(self.graph)
-		# create adjecency matrix of the pattern
-		P, p	= self.createAdjacencyMatrixMap(self.pattern)
-		# create M* binary matrix
-		M_star	= createM_star(h, p)
-
-		# create the order we will use later on
-		order	= createDecreasingOrder(h)
-		# deepcopy M_s into M
-		M		= [row[:] for row in M_star]
-
-		if self.optimize:
-			refineM(M, H, P, h, p)
-
-		findM(M_star, M, order, H, P, h, p)
-
-		return self.result
-
-
-	def matchVF2(self):
+	def matchVF2(self, pattern, graph):
 
 		class VF2_Obj(object):
 			"""
@@ -784,7 +361,7 @@ class PatternMatching(object):
 				# take a coding shortcut,
 				# use self.matchNaive function to see if it is feasable.
 				# this way, we immidiatly test the semantic attributes
-				if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges):
+				if not self.matchNaive(pattern, pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=graph.edges):
 					return False
 
 				# count ext_edges from core_graph to a adjecent vertices and
@@ -878,9 +455,11 @@ class PatternMatching(object):
 						# print(self.alreadyVisited)
 
 					self.indent += 1
-					if findM(H, P, h, p, VF2_obj, index_M + 1):
+					matched = yield from findM(H, P, h, p, VF2_obj, index_M + 1)
+					if matched:
 						# return True
-						print(self.indent*"  ","found match", len(self.results), ", continuing...")
+						# print(self.indent*"  ","found match", len(self.results), ", continuing...")
+						pass
 					self.indent -= 1
 
 					if True:
@@ -917,7 +496,8 @@ class PatternMatching(object):
 						if N_pattern[m] == -1 or VF2_obj.core_pattern[m]:
 							continue
 						print(self.indent*"  ","  m:", m)
-						if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
+						matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
+						if matched:
 							return True
 
 				return False
@@ -941,7 +521,8 @@ class PatternMatching(object):
 							# print(self.indent*"  ","      skipping")
 							continue
 						print(self.indent*"  ","    m:", m)
-						if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
+						matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
+						if matched:
 							return True
 
 				return False
@@ -955,50 +536,50 @@ class PatternMatching(object):
 				for vertex_bound, _ in VF2_obj.mapping.items():
 					bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound)
 
-				self.result	= self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges)
-				if self.result != None:
-					self.results.append(self.result)
-				return self.result != None
+				result	= self.matchNaive(pattern, vertices=bound_graph_vertices, edges=graph.edges)
+				if result != None:
+					yield result
+				return result != None
 
 			if index_M > 0:
 				# try the candidates is the preffered order
 				# first try the adjacent vertices connected via the outgoing edges.
 				print(self.indent*"  ","preferred L1")
-				if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern):
+				matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern)
+				if matched:
 					return True
 
 				print(self.indent*"  ","preferred L2")
 				# then try the adjacent vertices connected via the incoming edges.
-				if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern):
+				matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern)
+				if matched:
 					return True
 
 			print(self.indent*"  ","leastPreferred")
 			# and lastly, try the vertices not connected to the currently matched vertices
-			if leastPreferred(H, P, h, p, index_M, VF2_obj):
+			matched = yield from leastPreferred(H, P, h, p, index_M, VF2_obj)
+			if matched:
 				return True
 
 			return False
 
 
+		print("graph:", graph)
+
 		# create adjecency matrix of the graph
-		H, h	= self.createAdjacencyMatrixMap(self.graph)
-		print("adjacency:", H)
-		print("h:", len(h))
+		H, h	= self.createAdjacencyMatrixMap(graph, pattern)
 		# create adjecency matrix of the pattern
-		P, p	= self.createAdjacencyMatrixMap(self.pattern)
+		P, p	= self.createAdjacencyMatrixMap(pattern, pattern)
 
 		VF2_obj	= VF2_Obj(len(h), len(p))
 
-		self.indent = 0
-
 		# Only for debugging:
+		self.indent = 0
 		self.reverseMapH = { h[i] : i for i in range(len(h))}
 		self.reverseMapP = { p[i] : i for i in range(len(p))}
 
 		# Set of partial matches already explored - prevents us from producing the same match multiple times
 		# Encoded as a mapping from match size to the partial match
 		self.alreadyVisited = set()
-		
-		findM(H, P, h, p, VF2_obj)
 
-		return self.results
+		yield from findM(H, P, h, p, VF2_obj)