| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| UnorderedWindowIterator |
|
| 7.5;7.5 |
| 1 | // BSD License (http://www.galagosearch.org/license) | |
| 2 | package org.galagosearch.core.retrieval.structured; | |
| 3 | ||
| 4 | import java.io.IOException; | |
| 5 | import java.util.ArrayList; | |
| 6 | import org.galagosearch.tupleflow.Parameters; | |
| 7 | import org.galagosearch.tupleflow.Utility; | |
| 8 | ||
| 9 | /** | |
| 10 | * | |
| 11 | * @author trevor | |
| 12 | */ | |
| 13 | public class UnorderedWindowIterator extends ExtentConjunctionIterator { | |
| 14 | int width; | |
| 15 | boolean overlap; | |
| 16 | ||
| 17 | /** Creates a new instance of UnorderedWindowIterator */ | |
| 18 | public UnorderedWindowIterator(Parameters parameters, ExtentIterator[] extentIterators) throws IOException { | |
| 19 | 16 | super(extentIterators); |
| 20 | 16 | this.width = (int) parameters.getAsDefault("width", -1); |
| 21 | 16 | this.overlap = parameters.get("overlap", false); |
| 22 | 16 | findDocument(); |
| 23 | 16 | } |
| 24 | ||
| 25 | public void loadExtents() { | |
| 26 | 16 | extents.reset(); |
| 27 | ||
| 28 | ExtentArrayIterator[] iterators; | |
| 29 | 16 | int maximumPosition = 0; |
| 30 | 16 | int minimumPosition = Integer.MAX_VALUE; |
| 31 | ||
| 32 | // someday this will be a heap/priorityQueue for the overlapping case | |
| 33 | 16 | iterators = new ExtentArrayIterator[extentIterators.length]; |
| 34 | ||
| 35 | 48 | for (int i = 0; i < extentIterators.length; i++) { |
| 36 | 32 | iterators[i] = new ExtentArrayIterator(extentIterators[i].extents()); |
| 37 | 32 | minimumPosition = Math.min(iterators[i].current().begin, minimumPosition); |
| 38 | 32 | maximumPosition = Math.max(iterators[i].current().end, maximumPosition); |
| 39 | } | |
| 40 | ||
| 41 | do { | |
| 42 | 16 | boolean match = (maximumPosition - minimumPosition <= width); |
| 43 | ||
| 44 | // try to emit an extent here, but only if the width is small enough | |
| 45 | 16 | if (match) { |
| 46 | 16 | extents.add(document, minimumPosition, maximumPosition); |
| 47 | } | |
| 48 | 16 | if (overlap || !match) { |
| 49 | // either it didn't just match or we don't care about overlap, | |
| 50 | // so we want to increment only the very first iterator | |
| 51 | 0 | for (int i = 0; i < iterators.length; i++) { |
| 52 | 0 | if (iterators[i].current().begin == minimumPosition) { |
| 53 | 0 | boolean result = iterators[i].next(); |
| 54 | ||
| 55 | 0 | if (!result) { |
| 56 | 0 | return; |
| 57 | } | |
| 58 | } | |
| 59 | } | |
| 60 | } else { | |
| 61 | // last was a match, so increment all iterators past the end of the match | |
| 62 | 16 | for (int i = 0; i < iterators.length; i++) { |
| 63 | 16 | while (iterators[i].current().begin < maximumPosition) { |
| 64 | 16 | boolean result = iterators[i].next(); |
| 65 | ||
| 66 | 16 | if (!result) { |
| 67 | 16 | return; |
| 68 | } | |
| 69 | 0 | } |
| 70 | } | |
| 71 | } | |
| 72 | ||
| 73 | // reset the minimumPosition | |
| 74 | 0 | minimumPosition = Integer.MAX_VALUE; |
| 75 | 0 | maximumPosition = 0; |
| 76 | ||
| 77 | // now, reset bounds | |
| 78 | 0 | for (int i = 0; i < iterators.length; i++) { |
| 79 | 0 | minimumPosition = Math.min(minimumPosition, iterators[i].current().begin); |
| 80 | 0 | maximumPosition = Math.max(maximumPosition, iterators[i].current().end); |
| 81 | } | |
| 82 | 0 | } while (true); |
| 83 | } | |
| 84 | } |