View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   package org.galagosearch.core.retrieval.structured;
3   
4   import java.io.IOException;
5   import java.util.ArrayList;
6   import org.galagosearch.tupleflow.Parameters;
7   import org.galagosearch.tupleflow.Utility;
8   
9   /***
10   *
11   * @author trevor
12   */
13  public class UnorderedWindowIterator extends ExtentConjunctionIterator {
14      int width;
15      boolean overlap;
16  
17      /*** Creates a new instance of UnorderedWindowIterator */
18      public UnorderedWindowIterator(Parameters parameters, ExtentIterator[] extentIterators) throws IOException {
19          super(extentIterators);
20          this.width = (int) parameters.getAsDefault("width", -1);
21          this.overlap = parameters.get("overlap", false);
22          findDocument();
23      }
24  
25      public void loadExtents() {
26          extents.reset();
27  
28          ExtentArrayIterator[] iterators;
29          int maximumPosition = 0;
30          int minimumPosition = Integer.MAX_VALUE;
31  
32          // someday this will be a heap/priorityQueue for the overlapping case
33          iterators = new ExtentArrayIterator[extentIterators.length];
34  
35          for (int i = 0; i < extentIterators.length; i++) {
36              iterators[i] = new ExtentArrayIterator(extentIterators[i].extents());
37              minimumPosition = Math.min(iterators[i].current().begin, minimumPosition);
38              maximumPosition = Math.max(iterators[i].current().end, maximumPosition);
39          }
40  
41          do {
42              boolean match = (maximumPosition - minimumPosition <= width);
43  
44              // try to emit an extent here, but only if the width is small enough
45              if (match) {
46                  extents.add(document, minimumPosition, maximumPosition);
47              }
48              if (overlap || !match) {
49                  // either it didn't just match or we don't care about overlap,
50                  // so we want to increment only the very first iterator
51                  for (int i = 0; i < iterators.length; i++) {
52                      if (iterators[i].current().begin == minimumPosition) {
53                          boolean result = iterators[i].next();
54  
55                          if (!result) {
56                              return;
57                          }
58                      }
59                  }
60              } else {
61                  // last was a match, so increment all iterators past the end of the match
62                  for (int i = 0; i < iterators.length; i++) {
63                      while (iterators[i].current().begin < maximumPosition) {
64                          boolean result = iterators[i].next();
65  
66                          if (!result) {
67                              return;
68                          }
69                      }
70                  }
71              }
72  
73              // reset the minimumPosition
74              minimumPosition = Integer.MAX_VALUE;
75              maximumPosition = 0;
76  
77              // now, reset bounds
78              for (int i = 0; i < iterators.length; i++) {
79                  minimumPosition = Math.min(minimumPosition, iterators[i].current().begin);
80                  maximumPosition = Math.max(maximumPosition, iterators[i].current().end);
81              }
82          } while (true);
83      }
84  }