001 /*
002 Copyright (c) 2012, Regents of the University of Colorado
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without modification,
006 are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this
009 list of conditions and the following disclaimer.
010
011 * Redistributions in binary form must reproduce the above copyright notice,
012 this list of conditions and the following disclaimer in the documentation
013 and/or other materials provided with the distribution.
014
015 * Neither the name of the University of Colorado nor the names of its
016 contributors may be used to endorse or promote products derived from this
017 software without specific prior written permission.
018
019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
023 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030
031 package edu.ucdenver.ccp.esm;
032
033 import java.util.ArrayList;
034 import java.util.Arrays;
035 import java.util.HashMap;
036 import java.util.List;
037 import java.util.Map;
038 import java.util.Random;
039
040 import edu.uci.ics.jung.graph.DirectedGraph;
041
042 /**
043 * <p>The Exact Subgraph Matching (ESM) algorithm Java implementation for subgraph isomorphism problems</p>
044 *
045 *
046 * <p>The subgraph isomorphism problem is NP-hard. We designed a simple exact subgraph matching (ESM)
047 * algorithm for dependency graphs using a backtracking approach. This algorithm is designed to process
048 * dependency graphs (directed graphs, direction from governor token to dependent token) from dependency
049 * parsers for biological relation and event extraction. It has been demonstrated to be very efficient
050 * in these biological information extraction applications.</p>
051 *
052 *
053 * <p>The total worst-case algorithm complexity is O(<i>n</i>^2 * <i>k</i>^<i>n</i>) where n is the number of vertices and
054 * k is the vertex degree (number of adjacent edges). The best-case algorithm complexity is O(n^3 * k^2). </p>
055 *
056 * <p>For more details about our ESM algorithm, the analysis on time complexity, and the different
057 * applications of the algorithm, see the section "Related Publications" in the README file for the
058 * complete list of our ESM-related publications. </p>
059 *
060 * <p>This ESM implementation also provides a function to determine the graph isomorphism between two graphs.
061 * </p>
062 *
063 * @author Implemented by Haibin Liu and Tested by Philippe Thomas
064 *
065 */
066 public class ESM {
067 /** subgraph to be matched (normally smaller graph) */
068 DirectedGraph<Vertex, Edge> subgraph = null;
069 /** graph to be matched (normally bigger graph) */
070 DirectedGraph<Vertex, Edge> graph = null;
071 /** startnode of subgraph for matching (from which subgraph node to start the matching process) */
072 private Vertex subgraphStartNode = null;
073 /** a set of startnodes of graph for matching */
074 private List<Vertex> graphStartNodes = null;
075
076 /**
077 * Constructor to initialize the subgraph and graph and
078 * specify the start node of the subgraph and the set of start nodes of the graph
079 * @param subgraph : subgraph (supposed to be smaller)
080 * @param graph : graph (supposed to be bigger)
081 */
082 public ESM (DirectedGraph<Vertex, Edge> subgraph, DirectedGraph<Vertex, Edge> graph) {
083 this.graph = graph;
084 this.subgraph = subgraph;
085 //set the startnode of subgraph
086 subgraphStartNode = getRandomStartNode( new ArrayList<Vertex>(subgraph.getVertices()) );
087 graphStartNodes = new ArrayList<Vertex>(graph.getVertices());
088 }
089
090 /**
091 * randomly choose the start node of the subgraph (default setting)
092 * @param subgraphNodes
093 * @return start node of the subgraph
094 */
095 private Vertex getRandomStartNode( List<Vertex> subgraphNodes) {
096 //Create random class object
097 Random random = new Random();
098 //Generate a random number (index) with the size of the list being the maximum
099 int randomNumber = random.nextInt(subgraphNodes.size());
100 return subgraphNodes.get(randomNumber);
101 }
102
103 /**
104 * Instead of the default random start node, users can specify the start node for the subgraph
105 * @param vertex : user-specified start node
106 */
107 public void setSubgraphStartNode (Vertex vertex) {
108 subgraphStartNode = vertex;
109 }
110
111 /**
112 * The default setting for choosing the set of start nodes for the graph
113 * is use all nodes in the graph as start nodes. However, users can set the set of start
114 * nodes for the graph to specify which specific set of nodes they want to use to compare
115 * with the subgraph start node. This will narrow down the search by avoiding to match
116 * the subgraph start node with every graph node. Consequently, more efficient.
117 * @param vertices : user-specified set of start nodes
118 */
119 public void setGraphStartNodes (List<Vertex> vertices) {
120 graphStartNodes = vertices;
121 }
122
123 /**
124 * Retrieve specific matchings between the subgraph and the graph
125 * matching result is store in a map. the key is the node in the subgraph and
126 * the value is the injective matching node in the graph
127 * Since a subgraph can match different places of a graph,
128 * we record all the matchings by putting each matching into a List
129 * @return a list of matchings between two graphs
130 */
131 public List<Map<Vertex, Vertex>> getSubgraphMatchingMatches() {
132 List<Map<Vertex, Vertex>> matches = null;
133 if(!isSubgraphSmaller()) {
134 System.err.println("The size of the subgraph: " +
135 subgraph.getVertexCount() + " is bigger than the size of the graph " +
136 graph.getVertexCount() + ". Please check.");
137 return matches;
138 }
139
140 for(int i = 0; i < graphStartNodes.size(); i++) {
141 Map<Vertex, Vertex> subgraphToGraph = new HashMap<Vertex, Vertex>();
142 Map<Vertex, Vertex> graphToSubgraph = new HashMap<Vertex, Vertex>();
143 List<Map<Vertex, Vertex>> total = new ArrayList<Map<Vertex, Vertex>>();
144 List<Vertex> toMatch = Arrays.asList(subgraphStartNode, graphStartNodes.get(i));
145
146 if(matchNodeForAllMatches(toMatch, subgraphToGraph, graphToSubgraph, subgraph, graph, total)) {
147 if(matches == null) {
148 matches = new ArrayList<Map<Vertex, Vertex>>(total);
149 continue;
150 }
151 for(Map<Vertex, Vertex> m : total) {
152 boolean flag = true;
153 for(Map<Vertex, Vertex> n : matches) {
154 if( equalMaps(m, n) )
155 flag = false;
156 }
157 if(flag) matches.add(m);
158 }
159 }
160 }
161 return matches;
162 }
163
164 /**
165 * Check if two maps are equal
166 * @param m1 : first map
167 * @param m2 : second map
168 * @return true of false
169 */
170 private boolean equalMaps(Map<Vertex,Vertex> m1, Map<Vertex,Vertex> m2) {
171 if (m1.size() != m2.size())
172 return false;
173 for (Vertex key: m1.keySet())
174 if (!m1.get(key).equals(m2.get(key)))
175 return false;
176 return true;
177 }
178
179 /**
180 * The main recursive function for subgraph isomorphism
181 * this function helps retrieve all possible matchings between two graphs because
182 * a subgraph can have multiple matchings with a graph
183 * @param toMatchs : nodes to be matched in two graphs
184 * @param subgraphToGraphs : map to record mapping from the subgraph to the graph
185 * @param graphToSubgraphs : map to record mapping from the graph to the subgraph
186 * @param subgraph : the input subgraph
187 * @param graph : the input graph
188 * @param total : store all possible matchings between two graphs
189 * @return boolean to indicate if the matching is successful or not
190 */
191 private boolean matchNodeForAllMatches(List<Vertex> toMatchs, Map<Vertex, Vertex> subgraphToGraphs, Map<Vertex, Vertex> graphToSubgraphs,
192 DirectedGraph<Vertex, Edge> subgraph, DirectedGraph<Vertex, Edge> graph, List<Map<Vertex, Vertex>> total) {
193 //generate local copies
194 List<Vertex> toMatch = new ArrayList<Vertex>(toMatchs);
195 Map<Vertex, Vertex> subgraphToGraph = new HashMap<Vertex, Vertex>(subgraphToGraphs);
196 Map<Vertex, Vertex> graphToSubgraph = new HashMap<Vertex, Vertex>(graphToSubgraphs);
197
198 boolean failure = false;
199 boolean success = true;
200 while( toMatch.size() != 0 ) {
201 Vertex noder = toMatch.remove(0);
202 Vertex nodes = toMatch.remove(0);
203
204 //this is the place to check whether they can be matched
205 if(subgraphToGraph.containsKey(noder) && !graphToSubgraph.containsKey(nodes))
206 return failure;
207 if(!subgraphToGraph.containsKey(noder) && graphToSubgraph.containsKey(nodes))
208 return failure;
209 if(subgraphToGraph.containsKey(noder) && graphToSubgraph.containsKey(nodes)) {
210 if(subgraphToGraph.get(noder).equals(nodes) && graphToSubgraph.get(nodes).equals(noder)) {
211 //do nothing
212 }
213 else
214 return failure;
215 }
216
217 // Here we can make checks whether noder and nodes should match
218 if(!matchNodeContent(noder, nodes))
219 return failure;
220
221 //record the injective match
222 subgraphToGraph.put(noder, nodes);
223 graphToSubgraph.put(nodes, noder);
224
225 //one direction match (as governor)
226 for( Edge e : subgraph.getOutEdges(noder)) {
227 Vertex r = e.getDependent();
228 List<Vertex> candidateNodes = new ArrayList<Vertex>();
229 for(Edge s : graph.getOutEdges(nodes) ) {
230 if(e.getLabel().equals(s.getLabel()))
231 candidateNodes.add(s.getDependent());
232 }
233 boolean flag = false;
234 boolean terminate = false;
235 for(Vertex s : candidateNodes) {
236 if(subgraphToGraph.containsKey(r) && ! graphToSubgraph.containsKey(s))
237 continue;
238 if(!subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s))
239 continue;
240 if(subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s)) {
241 if(subgraphToGraph.get(r).equals(s) && graphToSubgraph.get(s).equals(r)) {
242 terminate = true;
243 break;
244 }
245 else continue;
246 }
247 List<Vertex> toMatchTemp = new ArrayList<Vertex>(toMatch);
248 toMatchTemp.add(noder); toMatchTemp.add(nodes);
249 toMatchTemp.add(r); toMatchTemp.add(s);
250 if(matchNodeForAllMatches(toMatchTemp, subgraphToGraph, graphToSubgraph, subgraph, graph, total))
251 flag = true;
252 }
253 if(terminate) continue;
254 if(flag) return success;
255
256 return failure;
257 }
258
259 //the other direction match (as dependent)
260 for( Edge e : subgraph.getInEdges(noder)) {
261 Vertex r = e.getGovernor();
262 List<Vertex> candidateNodes = new ArrayList<Vertex>();
263 for(Edge s : graph.getInEdges(nodes) ) {
264 if(e.getLabel().equals(s.getLabel()))
265 candidateNodes.add(s.getGovernor());
266 }
267 boolean flag = false;
268 boolean terminate = false;
269 for(Vertex s : candidateNodes) {
270 if(subgraphToGraph.containsKey(r) && ! graphToSubgraph.containsKey(s))
271 continue;
272 if(!subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s))
273 continue;
274 if(subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s)) {
275 if(subgraphToGraph.get(r).equals(s) && graphToSubgraph.get(s).equals(r)) {
276 terminate = true;
277 break;
278 }
279 else continue;
280 }
281 List<Vertex> toMatchTemp = new ArrayList<Vertex>(toMatch);
282 toMatchTemp.add(noder); toMatchTemp.add(nodes);
283 toMatchTemp.add(r); toMatchTemp.add(s);
284 if(matchNodeForAllMatches(toMatchTemp, subgraphToGraph, graphToSubgraph, subgraph, graph, total))
285 flag = true;
286 }
287 if(terminate) continue;
288 if(flag) return success;
289
290 return failure;
291 }
292
293 }
294
295 //success return
296 total.add(subgraphToGraph);
297 return success;
298 }
299
300 /**
301 * Check if the input subgraph is subsumed by the input graph
302 * @return true or false
303 */
304 public boolean isSubgraphIsomorphism() {
305 boolean isSubgraphIsomorphism = false;
306 if(!isSubgraphSmaller()) {
307 System.err.println("The size of the subgraph: " +
308 subgraph.getVertexCount() + " is bigger the size of the graph " +
309 graph.getVertexCount() + ". Please check.");
310 return isSubgraphIsomorphism;
311 }
312 for(int i = 0; i < graphStartNodes.size(); i++) {
313 Map<Vertex, Vertex> subgraphToGraph = new HashMap<Vertex, Vertex>();
314 Map<Vertex, Vertex> graphToSubgraph = new HashMap<Vertex, Vertex>();
315 List<Vertex> toMatch = Arrays.asList(subgraphStartNode, graphStartNodes.get(i));
316
317 if(matchNodeForSingleMatch(toMatch, subgraphToGraph, graphToSubgraph, subgraph, graph)) {
318 isSubgraphIsomorphism = true;
319 break;
320 }
321 }
322 return isSubgraphIsomorphism;
323 }
324
325 /**
326 * Provide an additional function to determine if two graphs are isomorphic to each other
327 * based on the fact that if two graphs are subgraph isomorphic to each other, then they are
328 * isomorphic to each other
329 * @return true or false
330 */
331 public boolean isGraphIsomorphism() {
332 boolean isGraphIsomorphism = false;
333 boolean isSubgraphIsomorphicToGraph = false;
334 boolean isgraphIsomorphicToSubgraph = false;
335
336 if(!isGraphSizeSame()) {
337 System.err.println("The size of the subgraph: " +
338 subgraph.getVertexCount() + " is not same as the size of the graph " +
339 graph.getVertexCount() + ". Please check.");
340 return isGraphIsomorphism;
341 }
342 //subgraph against graph
343 for(int i = 0; i < graphStartNodes.size(); i++) {
344 Map<Vertex, Vertex> subgraphToGraph = new HashMap<Vertex, Vertex>();
345 Map<Vertex, Vertex> graphToSubgraph = new HashMap<Vertex, Vertex>();
346 List<Vertex> toMatch = Arrays.asList(subgraphStartNode, graphStartNodes.get(i));
347 if(matchNodeForSingleMatch(toMatch, subgraphToGraph, graphToSubgraph, subgraph, graph)) {
348 isSubgraphIsomorphicToGraph = true;
349 break;
350 }
351 }
352
353 //graph against subgraph
354 //reset the startnode(s)
355 subgraphStartNode = getRandomStartNode( new ArrayList<Vertex>(graph.getVertices()) );
356 graphStartNodes = new ArrayList<Vertex>(subgraph.getVertices());
357 for(int i = 0; i < graphStartNodes.size(); i++) {
358 Map<Vertex, Vertex> subgraphToGraph = new HashMap<Vertex, Vertex>();
359 Map<Vertex, Vertex> graphToSubgraph = new HashMap<Vertex, Vertex>();
360
361 List<Vertex> toMatch = Arrays.asList(subgraphStartNode, graphStartNodes.get(i));
362 if(matchNodeForSingleMatch(toMatch, subgraphToGraph, graphToSubgraph, graph, subgraph)) {
363 isgraphIsomorphicToSubgraph = true;
364 break;
365 }
366 }
367
368 if(isSubgraphIsomorphicToGraph && isgraphIsomorphicToSubgraph)
369 isGraphIsomorphism = true;
370
371 //set the startnode(s) back
372 subgraphStartNode = getRandomStartNode( new ArrayList<Vertex>(subgraph.getVertices()) );
373 graphStartNodes = new ArrayList<Vertex>(graph.getVertices());
374
375 return isGraphIsomorphism;
376 }
377
378 /**
379 * The main recursive function for subgraph isomorphism
380 * this function only retrieve one possible matchings between two graphs.
381 * As long as it finds an isomorphic subgraph, it returns.
382 * Thus, this function is used to determine the subgraph isomorphism, instead of
383 * retrieving all possible matchings between two graphs. Therefore, faster.
384 * @param toMatchs : nodes to be matched in two graphs
385 * @param subgraphToGraphs : map to record mapping from the subgraph to the graph
386 * @param graphToSubgraphs : map to record mapping from the graph to the subgraph
387 * @param subgraph : the input subgraph
388 * @param graph : the input graph
389 * @return boolean to indicate if the subgraph isomorphism exists or not
390 */
391 private boolean matchNodeForSingleMatch(List<Vertex> toMatchs, Map<Vertex, Vertex> subgraphToGraphs, Map<Vertex, Vertex> graphToSubgraphs,
392 DirectedGraph<Vertex, Edge> subgraph, DirectedGraph<Vertex, Edge> graph) {
393 //generate local copies
394 List<Vertex> toMatch = new ArrayList<Vertex>(toMatchs);
395 Map<Vertex, Vertex> subgraphToGraph = new HashMap<Vertex, Vertex>(subgraphToGraphs);
396 Map<Vertex, Vertex> graphToSubgraph = new HashMap<Vertex, Vertex>(graphToSubgraphs);
397
398 boolean failure = false;
399 boolean success = true;
400 while( toMatch.size() != 0 ) {
401 Vertex noder = toMatch.remove(0);
402 Vertex nodes = toMatch.remove(0);
403 //System.out.println("before " + noder.getToken() + " -> " + nodes.getToken());
404 //this is the place to check whether they can be matched
405 if(subgraphToGraph.containsKey(noder) && !graphToSubgraph.containsKey(nodes))
406 return failure;
407 if(!subgraphToGraph.containsKey(noder) && graphToSubgraph.containsKey(nodes))
408 return failure;
409 if(subgraphToGraph.containsKey(noder) && graphToSubgraph.containsKey(nodes)) {
410 if(subgraphToGraph.get(noder).equals(nodes) && graphToSubgraph.get(nodes).equals(noder)) {
411 //do nothing
412 }
413 else
414 return failure;
415 }
416
417 // Here we can make checks whether noder and nodes should match
418 if(!matchNodeContent(noder, nodes))
419 return failure;
420
421 //record the injective match
422 subgraphToGraph.put(noder, nodes);
423 graphToSubgraph.put(nodes, noder);
424 //System.out.println("after " + noder.getToken() + " -> " + nodes.getToken());
425
426 //one direction match (as governor)
427 for( Edge e : subgraph.getOutEdges(noder)) {
428 Vertex r = e.getDependent();
429 List<Vertex> candidateNodes = new ArrayList<Vertex>();
430 for(Edge s : graph.getOutEdges(nodes) ) {
431 if(e.getLabel().equals(s.getLabel()))
432 candidateNodes.add(s.getDependent());
433 }
434 boolean terminate = false;
435 for(Vertex s : candidateNodes) {
436 if(subgraphToGraph.containsKey(r) && ! graphToSubgraph.containsKey(s))
437 continue;
438 if(!subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s))
439 continue;
440 if(subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s)) {
441 if(subgraphToGraph.get(r).equals(s) && graphToSubgraph.get(s).equals(r)) {
442 terminate = true;
443 break;
444 }
445 else continue;
446 }
447 List<Vertex> toMatchTemp = new ArrayList<Vertex>(toMatch);
448 toMatchTemp.add(noder); toMatchTemp.add(nodes);
449 toMatchTemp.add(r); toMatchTemp.add(s);
450 if(matchNodeForSingleMatch(toMatchTemp, subgraphToGraph, graphToSubgraph, subgraph, graph)) {
451 subgraphToGraphs = new HashMap<Vertex, Vertex>(subgraphToGraph);
452 graphToSubgraphs = new HashMap<Vertex, Vertex>(graphToSubgraph);
453 return success;
454 }
455 }
456 if(terminate) continue;
457
458 return failure;
459 }
460
461 //the other direction match (as dependent)
462 for( Edge e : subgraph.getInEdges(noder)) {
463 Vertex r = e.getGovernor();
464 List<Vertex> candidateNodes = new ArrayList<Vertex>();
465 for(Edge s : graph.getInEdges(nodes) ) {
466 if(e.getLabel().equals(s.getLabel()))
467 candidateNodes.add(s.getGovernor());
468 }
469 boolean terminate = false;
470 for(Vertex s : candidateNodes) {
471 if(subgraphToGraph.containsKey(r) && ! graphToSubgraph.containsKey(s))
472 continue;
473 if(!subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s))
474 continue;
475 if(subgraphToGraph.containsKey(r) && graphToSubgraph.containsKey(s)) {
476 if(subgraphToGraph.get(r).equals(s) && graphToSubgraph.get(s).equals(r)) {
477 terminate = true;
478 break;
479 }
480 else continue;
481 }
482 List<Vertex> toMatchTemp = new ArrayList<Vertex>(toMatch);
483 toMatchTemp.add(noder); toMatchTemp.add(nodes);
484 toMatchTemp.add(r); toMatchTemp.add(s);
485 if(matchNodeForSingleMatch(toMatchTemp, subgraphToGraph, graphToSubgraph, subgraph, graph)) {
486 subgraphToGraphs = new HashMap<Vertex, Vertex>(subgraphToGraph);
487 graphToSubgraphs = new HashMap<Vertex, Vertex>(graphToSubgraph);
488 return success;
489 }
490 }
491 if(terminate) continue;
492
493 return failure;
494 }
495
496 }
497
498 //success return
499 subgraphToGraphs = new HashMap<Vertex, Vertex>(subgraphToGraph);
500 graphToSubgraphs = new HashMap<Vertex, Vertex>(graphToSubgraph);
501 //for(Entry<Vertex, Vertex> entry : subgraphToGraphs.entrySet())
502 // System.out.println(entry.getKey().getToken() + " -> " + entry.getValue().getToken());
503 return success;
504 }
505
506 /**
507 * Determine if two nodes from two graphs should match with each other or not
508 * Current implementation check the compareForm in each node, which includes
509 * the generalized POS tag and the lemma information computed by the BioLemmatizer
510 * @param noder : node in the subgraph
511 * @param nodes : node in the graph
512 * @return true of false
513 */
514 private boolean matchNodeContent(Vertex noder, Vertex nodes) {
515 boolean canMatch = false;
516 // the matching criteria can be extended,
517 // i.e., word can be lemma, and tag can be generalized tag
518 // ontological resources can be also imported here for node matching
519
520 if( noder.getCompareForm().equals(nodes.getCompareForm())){
521 canMatch = true;
522 }
523 return canMatch;
524 }
525
526 /**
527 * Sanity check if the specified subgraph is smaller or equal to the specified graph
528 * This function is called first when determining the subgraph isomorphism
529 * @return true or false
530 */
531 private Boolean isSubgraphSmaller() {
532 return subgraph.getVertexCount() <= graph.getVertexCount();
533 }
534
535 /**
536 * Sanity check if the specified subgraph is equal to the specified graph
537 * This function is called first when determining the graph isomorphism
538 * @return true or false
539 */
540 private Boolean isGraphSizeSame() {
541 return subgraph.getVertexCount() == graph.getVertexCount();
542 }
543
544 }