match_algo.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687
  1. '''This file is part of AToMPM - A Tool for Multi-Paradigm Modelling
  2. Copyright 2011 by the AToMPM team and licensed under the LGPL
  3. See COPYING.lesser and README.md in the root of this project for full details'''
  4. import sys
  5. from .himesis import Himesis
  6. class Priority(object):
  7. """
  8. Implements heuristics for the HimesisMatcher algorithm.
  9. Determines the order in which the candidate pairs should be computed.
  10. By default, the order is the index order of the nodes in igraph.
  11. To refine this heuristic order, you should sub-class Priority and override its methods.
  12. """
  13. def __init__(self):
  14. """
  15. Implements heuristics for the HimesisMatcher algorithm.
  16. Determines the order in which the candidate pairs should be computed.
  17. By default, the order is the index order of the nodes in igraph.
  18. To refine this heuristic order, you should sub-class Priority and override its methods.
  19. """
  20. self.source_graph = None
  21. self.pattern_graph = None
  22. def cache_info(self, source_graph, pattern_graph):
  23. """
  24. Pre-computes any information required by the order and order_all methods
  25. @param source_graph: The source graph.
  26. @param pattern_graph: The pattern graph.
  27. """
  28. pass
  29. def order_source(self, candidate_list):
  30. """
  31. Specifies the order for the terminal sets for the source graph.
  32. @param candidate_list: The list of possible candidates.
  33. """
  34. return sorted(candidate_list)
  35. def order_pattern(self, candidate_list):
  36. """
  37. Specifies the order for the terminal sets for the pattern graph.
  38. @param candidate_list: The list of possible candidates.
  39. """
  40. return sorted(candidate_list)
  41. def order_all_source(self, candidate_list):
  42. """
  43. Specifies the order for all source nodes.
  44. @param candidate_list: The list of possible candidates.
  45. """
  46. return candidate_list
  47. def order_all_pattern(self, candidate_list):
  48. """
  49. Specifies the order for all pattern nodes.
  50. @param candidate_list: The list of possible candidates.
  51. """
  52. return candidate_list
  53. class HimesisMatcher(object):
  54. """
  55. Represents a pattern matching algorithm for typed attributed multi-graphs.
  56. The pattern matching algorithm is based on VF2.
  57. """
  58. def __init__(self, source_graph, pattern_graph, priority=Priority(), pred1={}, succ1={}):
  59. """
  60. Represents a pattern matching algorithm for typed attributed multi-graphs.
  61. @param source_graph: The source graph.
  62. @param pattern_graph: The pattern graph.
  63. @param priority: Instance of a sub-class of the Priority class.
  64. It is used to determine the order in which the candidate pairs should be computed.
  65. @param pred1: Pre-built dictionary of predecessors in the source graph.
  66. @param succ1: Pre-built dictionary of successors in the source graph.
  67. """
  68. self.G1 = source_graph
  69. self.G2 = pattern_graph
  70. self.pred1 = pred1
  71. self.succ1 = succ1
  72. assert(isinstance(priority, Priority))
  73. self.priority = priority
  74. self.priority.source_graph = source_graph
  75. self.priority.pattern_graph = pattern_graph
  76. # Set recursion limit
  77. self.old_recursion_limit = sys.getrecursionlimit()
  78. expected_max_recursion_level = self.G2.vcount()
  79. if self.old_recursion_limit < 1.5 * expected_max_recursion_level:
  80. # Give some breathing room
  81. sys.setrecursionlimit(int(1.5 * expected_max_recursion_level))
  82. # Initialize the state
  83. self.initialize()
  84. # Check whether we are considering multi-graph
  85. # if reduce(lambda x,y: x or y, self.G2.is_multiple()):
  86. # self.cache_info_multi(self.G1_nodes, self.G2_nodes)
  87. # Scan the two graphs to cache required information.
  88. # Typically stores the results of expensive operation on the graphs.
  89. # This speeds up the algorithm significantly.
  90. self.cache_info()
  91. def cache_info(self):
  92. """
  93. Cache information on the nodes.
  94. Typically stores the results of expensive operation on the graphs.
  95. This speeds up the algorithm significantly.
  96. """
  97. # Cache individual nodes
  98. self.G1_nodes = self.G1.node_iter()
  99. self.G2_nodes = self.G2.node_iter()
  100. # # Memoize the predecessor & successor information:
  101. # # for each node store the number of neighbours and the list
  102. # if len(self.pred1) == 0 or len(self.succ1) == 0:
  103. # self.pred1 = {}
  104. # self.succ1 = {}
  105. # for node in self.G1_nodes:
  106. # self.pred1[node] = (len(self.G1.predecessors(node)), self.G1.predecessors(node))
  107. # self.succ1[node] = (len(self.G1.successors(node)), self.G1.successors(node))
  108. # self.pred2 = {}
  109. # self.succ2 = {}
  110. # for node in self.G2_nodes:
  111. # self.pred2[node] = (len(self.G2.predecessors(node)), self.G2.predecessors(node))
  112. # self.succ2[node] = (len(self.G2.successors(node)), self.G2.successors(node))
  113. # Cache any further data used for the heuristic prioritization for computing the candidate pair
  114. # This is done when initializing the priority class
  115. self.priority.cache_info(self.G1, self.G2)
  116. def reset_recursion_limit(self):
  117. """
  118. Restores the recursion limit.
  119. """
  120. sys.setrecursionlimit(self.old_recursion_limit)
  121. def initialize(self):
  122. """
  123. (Re)Initializes the state of the algorithm.
  124. """
  125. #=======================================================================
  126. # The algorithm is based on VF2.
  127. # The following are the data-structures used:
  128. # - M_1: the current partial mapping from G1 to G2
  129. # - M_2: the current partial mapping from G2 to G1
  130. # - T1_in: the in-neighbours of the nodes in M_1
  131. # - T2_in: the in-neighbours of the nodes in M_2
  132. # - T1_out: the out-neighbours of the nodes in M_1
  133. # - T2_out: the out-neighbours of the nodes in M_2
  134. #=======================================================================
  135. # core_1[n] contains the index of the node m paired with n, if n is in the mapping
  136. self.core_1 = {} # This is M_1
  137. # core_2[m] contains the index of the node n paired with m, if m is in the mapping
  138. self.core_2 = {} # This is M_2
  139. # The value stored is the depth of the search tree when the node became part of the corresponding set
  140. # Non-zero if n is in M_1 or in T_1^{in}
  141. self.in_1 = {}
  142. # Non-zero if n is in M_1 or in T_1^{out}
  143. self.out_1 = {}
  144. # Non-zero if m is in M_2 or in T_2^{in}
  145. self.in_2 = {}
  146. # Non-zero if m is in M_2 or in T_2^{out}
  147. self.out_2 = {}
  148. # To improve the performance, we also store the following vectors
  149. # Non-zero if n is in M_1 or in T_1^{in} or in T_1^{out}
  150. self.inout_1 = {}
  151. # Non-zero if n is in M_2 or in T_2^{in} or in T_2^{out}
  152. self.inout_2 = {}
  153. # Prepare the necessary data structures required for backtracking
  154. self.state = HimesisMatcherState(self)
  155. # Provide a convenient way to access the isomorphism mapping.
  156. self.mapping = self.core_2.copy()
  157. def are_compatibile(self, src_node, patt_node):
  158. """
  159. Verifies if a candidate pair is compatible.
  160. More specifically, verify degree and meta-model compatibility.
  161. @param src_node: The candidate from the source graph.
  162. @param patt_node: The candidate from the pattern graph.
  163. """
  164. sourceNode = self.G1.vs[src_node]
  165. patternNode = self.G2.vs[patt_node]
  166. # First check if they are of the same type
  167. if sourceNode[Himesis.Constants.FULLTYPE] == patternNode[Himesis.Constants.FULLTYPE]:
  168. # Then check for the degree compatibility
  169. return (self.pred2[patt_node][0] <= self.pred1[src_node][0]
  170. and self.succ2[patt_node][0] <= self.succ1[src_node][0])
  171. # Otherwise, first check for the degree compatibility
  172. elif not (self.pred2[patt_node][0] <= self.pred1[src_node][0]
  173. and self.succ2[patt_node][0] <= self.succ1[src_node][0]):
  174. return False
  175. # Then check sub-types compatibility
  176. else:
  177. return (patternNode[Himesis.Constants.MT_SUBTYPE_MATCH]
  178. and sourceNode[Himesis.Constants.FULLTYPE] in patternNode[Himesis.Constants.MT_SUBTYPES])
  179. def candidate_pairs_iter(self):
  180. """
  181. Iterator over candidate pairs of nodes in G1 and G2, according to the VF2 algorithm.
  182. The candidate pairs have all passed the compatibility check before output.
  183. @return: The candidate pair (source node, pattern node)
  184. """
  185. #=======================================================================
  186. # Here we compute P(s) = (p1,p2) the candidate pair
  187. # for the current partial mapping M(s).
  188. #=======================================================================
  189. # First try the nodes that are in both Ti_in and Ti_out
  190. if len(self.inout_1) > len(self.core_1) and len(self.inout_2) > len(self.core_2):
  191. for patt_node in self.priority.order_pattern(self.inout_2):
  192. if patt_node not in self.core_2:
  193. break
  194. for src_node in self.priority.order_source(self.inout_1):
  195. if src_node not in self.core_1:
  196. yield src_node, patt_node
  197. # If T1_out and T2_out are both non-empty:
  198. # P(s) = T1_out x {min T2_out}
  199. elif len(self.out_1) > len(self.core_1) and len(self.out_2) > len(self.core_2):
  200. for patt_node in self.priority.order_pattern(self.out_2):
  201. if patt_node not in self.core_2:
  202. break
  203. for src_node in self.priority.order_source(self.out_1):
  204. if src_node not in self.core_1:
  205. yield src_node, patt_node
  206. # If T1_in and T2_in are both non-empty:
  207. # P(s) = T1_in x {min T2_in}
  208. elif len(self.in_1) > len(self.core_1) and len(self.in_2) > len(self.core_2):
  209. for patt_node in self.priority.order_pattern(self.in_2):
  210. if patt_node not in self.core_2:
  211. break
  212. for src_node in self.priority.order_source(self.in_1):
  213. if src_node not in self.core_1:
  214. yield src_node, patt_node
  215. # If all terminal sets are empty:
  216. # P(s) = (N_1 - M_1) x {min (N_2 - M_2)}
  217. else:
  218. for patt_node in self.priority.order_all_pattern(self.G2_nodes):
  219. if patt_node not in self.core_2:
  220. break
  221. for src_node in self.priority.order_all_source(self.G1_nodes):
  222. if src_node not in self.core_1:
  223. yield src_node, patt_node
  224. def are_syntactically_feasible(self, src_node, patt_node):
  225. """
  226. Determines whether the two nodes are syntactically feasible,
  227. i.e., it ensures that adding this candidate pair does not make it impossible to find a total mapping.
  228. @param src_node: The candidate from the source graph.
  229. @param patt_node: The candidate from the pattern graph.
  230. @return: True if they are syntactically feasible, False otherwise.
  231. """
  232. #=======================================================================
  233. # The syntactic feasibility considers the topology of the two graphs.
  234. # It verifies that edges directly or indirectly connected to M(s + P(s))
  235. # does not violate the subgraph matching conditions.
  236. #=======================================================================
  237. # Check for self-loops
  238. # e1, e2 = -1, -1
  239. # if patt_node in self.succ2[patt_node] or patt_node in self.pred2[patt_node]:
  240. # if src_node in self.succ1[src_node] or src_node in self.pred1[src_node]:
  241. # e1 = self.G1.get_eid(src_node, src_node)
  242. # e2 = self.G2.get_eid(patt_node, patt_node)
  243. # if self.G1.count_multiple(e1) < self.G2.count_multiple(e2):
  244. # return False
  245. # else:
  246. # return False
  247. # Counters for in and out edges found
  248. in1 = 0
  249. in2 = 0
  250. out1 = 0
  251. out2 = 0
  252. inout1 = 0
  253. inout2 = 0
  254. # Checks if successors are compatible
  255. for successor2 in self.succ2[patt_node][1]:
  256. tmp = self.G2.predecessors(successor2)
  257. self.pred2[successor2] = (len(tmp), tmp)
  258. tmp = self.G2.successors(successor2)
  259. self.succ2[successor2] = (len(tmp), tmp)
  260. if successor2 not in self.core_2:
  261. for successor1 in self.succ1[src_node][1]:
  262. tmp = self.G1.predecessors(successor1)
  263. self.pred1[successor1] = (len(tmp), tmp)
  264. tmp = self.G1.successors(successor1)
  265. self.succ1[successor1] = (len(tmp), tmp)
  266. if (self.succ2[successor2][0] <= self.succ1[successor1][0]
  267. and self.pred2[successor2][0] <= self.pred1[successor1][0]
  268. and successor1 not in self.core_1):
  269. break
  270. else:
  271. return False
  272. # They are compatible, so update the counters of the pattern node
  273. if self.pred2[successor2][1]:
  274. in2 += 1
  275. if self.succ2[successor2][1]:
  276. out2 += 1
  277. if not self.pred2[successor2][1] and not self.succ2[successor2][1]:
  278. inout2 += 1
  279. else:
  280. if self.core_2[successor2] not in self.succ1[src_node][1]:
  281. return False
  282. # Checks if predecessors are compatible
  283. for predecessor2 in self.pred2[patt_node][1]:
  284. tmp = self.G2.predecessors(predecessor2)
  285. self.pred2[predecessor2] = (len(tmp), tmp)
  286. tmp = self.G2.successors(predecessor2)
  287. self.succ2[predecessor2] = (len(tmp), tmp)
  288. if predecessor2 not in self.core_2:
  289. for predecessor1 in self.pred1[src_node][1]:
  290. tmp = self.G1.predecessors(predecessor1)
  291. self.pred1[predecessor1] = (len(tmp), tmp)
  292. tmp = self.G1.successors(predecessor1)
  293. self.succ1[predecessor1] = (len(tmp), tmp)
  294. if (self.pred2[predecessor2][0] <= self.pred1[predecessor1][0]
  295. and self.pred2[predecessor2][0] <= self.pred1[predecessor1][0]
  296. and predecessor1 not in self.core_1):
  297. break
  298. else:
  299. return False
  300. # They are compatible, so update the counters of the pattern node
  301. if self.pred2[predecessor2][1]:
  302. in2 += 1
  303. if self.pred2[predecessor2][1]:
  304. out2 += 1
  305. if not self.pred2[predecessor2][1] and not self.pred2[predecessor2][1]:
  306. inout2 += 1
  307. else:
  308. if self.core_2[predecessor2] not in self.pred1[src_node][1]:
  309. return False
  310. # Now compute the counters of the source node
  311. for successor1 in self.succ1[src_node][1]:
  312. if successor1 not in self.core_1:
  313. tmp = self.G1.predecessors(successor1)
  314. self.pred1[successor1] = (len(tmp), tmp)
  315. tmp = self.G1.successors(successor1)
  316. self.succ1[successor1] = (len(tmp), tmp)
  317. if self.pred1[successor1][1]:
  318. in1 += 1
  319. if self.succ1[successor1][1]:
  320. out1 += 1
  321. if not self.pred1[successor1][1] and not self.succ1[successor1][1]:
  322. inout1 += 1
  323. # For induced matches
  324. #else:
  325. # if self.core_1[successor1] not in self.succ2[patt_node]:
  326. # return False
  327. # Now compute the counters of the source node
  328. for predecessor1 in self.pred1[src_node][1]:
  329. if predecessor1 not in self.core_1:
  330. tmp = self.G1.predecessors(predecessor1)
  331. self.pred1[predecessor1] = (len(tmp), tmp)
  332. tmp = self.G1.successors(predecessor1)
  333. self.succ1[predecessor1] = (len(tmp), tmp)
  334. if self.pred1[predecessor1][1]:
  335. in1 += 1
  336. if self.pred1[predecessor1][1]:
  337. out1 += 1
  338. if not self.pred1[predecessor1][1] and not self.pred1[predecessor1][1]:
  339. inout1 += 1
  340. # For induced matches
  341. #else:
  342. # if self.core_1[predecessor1] not in self.pred2[patt_node]:
  343. # return False
  344. # Finally, verify if all counters satisfy the subgraph matching conditions
  345. # For induced matches
  346. #return in2 <= in1 and out2 <= out1 and inout2 <= inout1
  347. return in2 <= in1 and out2 <= out1 and (in2 + out2 + inout2) <= (in1 + out1 + inout1)
  348. def are_semantically_feasible(self, src_node, patt_node):
  349. """
  350. Determines whether the two nodes are syntactically feasible,
  351. i.e., it ensures that adding this candidate pair does not make it impossible to find a total mapping.
  352. @param src_node: The candidate from the source graph.
  353. @param patt_node: The candidate from the pattern graph.
  354. @return: True if they are semantically feasible, False otherwise.
  355. """
  356. #=======================================================================
  357. # This feasibility check looks at the data stored in the pair of candidates.
  358. # It verifies that all attribute constraints are satisfied.
  359. #=======================================================================
  360. src_node = self.G1.vs[src_node]
  361. patt_node = self.G2.vs[patt_node]
  362. # check the type information
  363. exact_type_match = src_node[Himesis.Constants.FULLTYPE] == patt_node[Himesis.Constants.FULLTYPE]
  364. sub_type_match = patt_node[Himesis.Constants.MT_SUBTYPE_MATCH] and \
  365. src_node[Himesis.Constants.FULLTYPE] in patt_node[Himesis.Constants.MT_SUBTYPES]
  366. if not (exact_type_match or sub_type_match):
  367. return False
  368. # Check for attributes value/constraint
  369. for attr in patt_node.attribute_names():
  370. # Ignore non-RAM attributes
  371. if not Himesis.is_RAM_attribute(attr) :
  372. continue
  373. # If the attribute does not "in theory" exist
  374. # because igraph actually stores all attribute names in all nodes.
  375. elif patt_node[attr] == None:
  376. continue
  377. # Node patt_node has not yet been matched to src_node... however,
  378. # patt_node[attr](..) is expecting a mapping of patt_node's mtLabel
  379. # to src_node's index in self.G1... so we build this mapping first
  380. mtLabel2graphIndexMap = {}
  381. mtLabel2graphIndexMap[ patt_node[Himesis.Constants.MT_LABEL] ] = src_node.index
  382. try:
  383. if not patt_node[attr](mtLabel2graphIndexMap,self.G1):
  384. return False
  385. except Exception as e:
  386. #TODO: This should be a TransformationLanguageSpecificException
  387. raise Exception("An error has occurred while checking the constraint of the attribute '%s' :: %s" % (attr, str(e)))
  388. return True
  389. def _match(self):
  390. """
  391. Extends the pattern matching mapping.
  392. This method is recursively called to determine if the pattern G2
  393. can be completely matched on G1.
  394. @return: The mapping {pattern node index : source node index}
  395. """
  396. #=======================================================================
  397. # It cleans up the class variables after each recursive call.
  398. # If a match is found, we yield the mapping.
  399. #=======================================================================
  400. # Base condition when a complete match is found
  401. if len(self.core_2) == self.G2.vcount():
  402. # Save the final mapping, otherwise garbage collection deletes it
  403. self.mapping = self.core_2.copy()
  404. yield self.mapping
  405. else:
  406. for src_node, patt_node in self.candidate_pairs_iter():
  407. # Cache the predecessors and successors of the candidate pairs on the fly
  408. self.pred1, self.succ1, self.pred2, self.succ2 = {}, {}, {}, {}
  409. self.pred1[src_node] = (len(self.G1.predecessors(src_node)), self.G1.predecessors(src_node))
  410. self.succ1[src_node] = (len(self.G1.successors(src_node)), self.G1.successors(src_node))
  411. self.pred2[patt_node] = (len(self.G2.predecessors(patt_node)), self.G2.predecessors(patt_node))
  412. self.succ2[patt_node] = (len(self.G2.successors(patt_node)), self.G2.successors(patt_node))
  413. if self.are_compatibile(src_node, patt_node):
  414. if self.are_syntactically_feasible(src_node, patt_node):
  415. if self.are_semantically_feasible(src_node, patt_node):
  416. # Recursive call, adding the feasible state
  417. newstate = self.state.__class__(self, src_node, patt_node)
  418. for mapping in self._match():
  419. yield mapping
  420. # restore data structures
  421. newstate.restore()
  422. def has_match(self, context={}):
  423. """
  424. Determines if the pattern graph can be matched on the source graph.
  425. @param context: Optional predefined mapping {string:uuid}.
  426. @return: True if a match is found, False otherwise.
  427. """
  428. try:
  429. self.match_iter(context).next()
  430. return True
  431. except StopIteration:
  432. return False
  433. def match_iter(self, context={}):
  434. """
  435. Iterator over matchings of the pattern graph on the source graph.
  436. @param context: Optional predefined mapping {pattern node index: source node index}.
  437. @return: The mapping {pattern node index : source node index}.
  438. """
  439. self.initialize()
  440. for p in context:
  441. if self.are_semantically_feasible(context[p], p):
  442. self.state.__class__(self, context[p], p)
  443. else:
  444. # Additional constraints on the pivot nodes are not satisfied: no match is possible
  445. return
  446. for mapping in self._match():
  447. yield mapping
  448. class HimesisMatcherState(object):
  449. """
  450. Internal representation of state for the HimesisMatcher class.
  451. This class is used internally by the HimesisMatcher class. It is used
  452. only to store state specific data. There will be at most V(pattern graph) of
  453. these objects in memory at a time, due to the depth-first search
  454. strategy employed by the VF2 algorithm.
  455. """
  456. def __init__(self, matcher, src_node=None, patt_node=None):
  457. """
  458. Internal representation of state for the HimesisMatcher class.
  459. @param matcher: The HimesisMatcher object.
  460. @param src_node: The source node of the candidate pair.
  461. @param src_node: The pattern node of the candidate pair.
  462. """
  463. self.matcher = matcher
  464. # Initialize the last stored node pair.
  465. self.src_node = None
  466. self.patt_node = None
  467. self.depth = len(matcher.core_1)
  468. if src_node is None or patt_node is None:
  469. # Then we reset the class variables
  470. matcher.core_1 = {}
  471. matcher.core_2 = {}
  472. matcher.in_1 = {}
  473. matcher.in_2 = {}
  474. matcher.out_1 = {}
  475. matcher.out_2 = {}
  476. matcher.inout_1 = {}
  477. matcher.inout_2 = {}
  478. # Watch out! src_node == 0 should evaluate to True.
  479. if src_node is not None and patt_node is not None:
  480. # Add the node pair to the isomorphism mapping.
  481. matcher.core_1[src_node] = patt_node
  482. matcher.core_2[patt_node] = src_node
  483. # Store the node that was added last.
  484. self.src_node = src_node
  485. self.patt_node = patt_node
  486. # Now we must update the other four vectors.
  487. # We will add only if it is not in there already!
  488. self.depth = len(matcher.core_1)
  489. # First we add the new nodes...
  490. for vector in (matcher.in_1, matcher.out_1, matcher.inout_1):
  491. if src_node not in vector:
  492. vector[src_node] = self.depth
  493. for vector in (matcher.in_2, matcher.out_2, matcher.inout_2):
  494. if patt_node not in vector:
  495. vector[patt_node] = self.depth
  496. # Now we add every other node...
  497. # Updates for T_1^{in}
  498. new_nodes_in = []
  499. for node in matcher.core_1:
  500. n = [predecessor for predecessor in matcher.G1.predecessors(node)
  501. if predecessor not in matcher.core_1 and predecessor not in new_nodes_in]
  502. new_nodes_in += n
  503. for node in new_nodes_in:
  504. if node not in matcher.in_1:
  505. matcher.in_1[node] = self.depth
  506. # Updates for T_1^{out}
  507. new_nodes_out = []
  508. for node in matcher.core_1:
  509. n = [successor for successor in matcher.G1.successors(node)
  510. if successor not in matcher.core_1 and successor not in new_nodes_out]
  511. new_nodes_out += n
  512. for node in new_nodes_out:
  513. if node not in matcher.out_1:
  514. matcher.out_1[node] = self.depth
  515. # Updates for T_1^{inout}
  516. for node in set(list(matcher.in_1.keys()) + list(matcher.out_1.keys())):
  517. if node in matcher.out_1 and node in matcher.in_1 and node not in matcher.inout_1:
  518. matcher.inout_1[node] = self.depth
  519. # Updates for T_2^{in}
  520. new_nodes_in = []
  521. for node in matcher.core_2:
  522. n = [predecessor for predecessor in matcher.G2.predecessors(node)
  523. if predecessor not in matcher.core_2 and predecessor not in new_nodes_in]
  524. new_nodes_in += n
  525. for node in new_nodes_in:
  526. if node not in matcher.in_2:
  527. matcher.in_2[node] = self.depth
  528. # Updates for T_2^{out}
  529. new_nodes_out = []
  530. for node in matcher.core_2:
  531. n = [successor for successor in matcher.G2.successors(node)
  532. if successor not in matcher.core_2 and successor not in new_nodes_out]
  533. new_nodes_out += n
  534. for node in new_nodes_out:
  535. if node not in matcher.out_2:
  536. matcher.out_2[node] = self.depth
  537. # Updates for T_2^{inout}
  538. for node in set(list(matcher.in_2.keys()) + list(matcher.out_2.keys())):
  539. if node in matcher.out_2 and node in matcher.in_2 and node not in matcher.inout_2:
  540. matcher.inout_2[node] = self.depth
  541. def restore(self):
  542. """
  543. Deletes the HimesisMatcherState object and restores the class variables.
  544. """
  545. # First we remove the node that was added from the core vectors.
  546. # Watch out! src_node == 0 should evaluate to True.
  547. if self.src_node is not None and self.patt_node is not None:
  548. del self.matcher.core_1[self.src_node]
  549. del self.matcher.core_2[self.patt_node]
  550. # Now we revert the other four vectors.
  551. # Thus, we delete all entries which have this depth level.
  552. for vector in (self.matcher.in_1, self.matcher.in_2, self.matcher.out_1, self.matcher.out_2, self.matcher.inout_1, self.matcher.inout_2):
  553. for node in list(vector.keys()):
  554. if vector[node] == self.depth:
  555. del vector[node]
  556. class VF2(HimesisMatcher):
  557. """
  558. The native VF2 algorithm for subgraph isomorphism.
  559. """
  560. def __init__(self, G1, G2):
  561. """
  562. The native VF2 algorithm for subgraph isomorphism.
  563. @param G1: The bigger graph.
  564. @param G2: The smaller graph.
  565. """
  566. HimesisMatcher.__init__(self, G1, G2)
  567. def match_iter(self):
  568. """
  569. Iterator over mappings of G2 on a subgraph of G1.
  570. @return: The mapping {pattern node uuid : source node uuid}.
  571. """
  572. for mapping in self.G1.get_subisomorphisms_vf2(self.G2):
  573. # mapping is a list for which mapping[i] is the source node index mapped to the pattern node index i
  574. # So we need to convert it into a dictionary
  575. match = {}
  576. for pattern_node, src_node in enumerate(mapping):
  577. match[pattern_node] = src_node
  578. yield match
  579. class SubgraphIsoMatcher(HimesisMatcher):
  580. """
  581. The VF2 algorithm for subgraph isomorphism as implemented in HimesisMatcher.
  582. Basically this is the same as HimesisMatcher but no node data is taken into consideration.
  583. """
  584. def __init__(self, source_graph, pattern_graph, priority=Priority()):
  585. """
  586. The VF2 algorithm for subgraph isomorphism as implemented in HimesisMatcher.
  587. Basically this is the same as HimesisMatcher but no node data is taken into consideration.
  588. """
  589. HimesisMatcher.__init__(self, source_graph, pattern_graph, priority)
  590. def are_compatibile(self, src_node, patt_node):
  591. """
  592. Verifies if a candidate pair is compatible.
  593. More specifically, verify degree compatibility.
  594. @param src_node: The candidate from the source graph.
  595. @param patt_node: The candidate from the pattern graph.
  596. """
  597. return (self.pred2[patt_node][0] <= self.pred1[src_node][0]
  598. and self.succ2[patt_node][0] <= self.succ1[src_node][0])
  599. def are_semantically_feasible(self, sourceNode, patternNode):
  600. """
  601. Since no data is considered, the graphs have no semantics.
  602. @param src_node: The candidate from the source graph.
  603. @param patt_node: The candidate from the pattern graph.
  604. @return: True always.
  605. """
  606. return True