View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.retrieval.structured;
4   
5   import java.io.FileNotFoundException;
6   import java.io.IOException;
7   import java.util.ArrayList;
8   import java.util.PriorityQueue;
9   import org.galagosearch.core.index.StructuredIndex;
10  import org.galagosearch.core.retrieval.query.Node;
11  import org.galagosearch.core.retrieval.query.StructuredQuery;
12  import org.galagosearch.core.retrieval.Retrieval;
13  import org.galagosearch.core.retrieval.ScoredDocument;
14  import org.galagosearch.core.retrieval.query.NodeType;
15  import org.galagosearch.core.retrieval.traversal.AddCombineTraversal;
16  import org.galagosearch.core.retrieval.traversal.ImplicitFeatureCastTraversal;
17  import org.galagosearch.core.retrieval.traversal.IndriWindowCompatibilityTraversal;
18  import org.galagosearch.core.retrieval.traversal.TextFieldRewriteTraversal;
19  import org.galagosearch.core.retrieval.traversal.WeightConversionTraversal;
20  import org.galagosearch.tupleflow.Parameters;
21  
22  /***
23   *
24   * @author trevor
25   */
26  public class StructuredRetrieval extends Retrieval {
27      StructuredIndex index;
28      FeatureFactory featureFactory;
29  
30      public StructuredRetrieval(StructuredIndex index) {
31          this.index = index;
32          Parameters featureParameters = new Parameters();
33          featureParameters.add("collectionLength", Long.toString(index.getCollectionLength()));
34          featureParameters.add("documentCount", Long.toString(index.getDocumentCount()));
35          featureFactory = new FeatureFactory(featureParameters);
36      }
37  
38      public StructuredRetrieval(String filename) throws FileNotFoundException, IOException {
39          this(new StructuredIndex(filename));
40      }
41  
42      public ScoredDocument[] getArrayResults(PriorityQueue<ScoredDocument> scores) {
43          ScoredDocument[] results = new ScoredDocument[scores.size()];
44  
45          for (int i = scores.size() - 1; i >= 0; i--) {
46              results[i] = scores.poll();
47          }
48  
49          return results;
50      }
51      
52      public NodeType getNodeType(Node node) throws Exception {
53          NodeType nodeType = index.getNodeType(node);
54          if (nodeType == null) {
55              nodeType = featureFactory.getNodeType(node);
56          }
57          return nodeType;
58      }
59      
60      public StructuredIterator createIterator(Node node) throws Exception {
61          ArrayList<StructuredIterator> internalIterators = new ArrayList<StructuredIterator>();
62  
63          for (Node internalNode : node.getInternalNodes()) {
64              StructuredIterator internalIterator = createIterator(internalNode);
65              internalIterators.add(internalIterator);
66          }
67          
68          StructuredIterator iterator = index.getIterator(node);
69          if (iterator == null) {
70              iterator = featureFactory.getIterator(node, internalIterators);
71          }
72          
73          return iterator;
74      }
75  
76      public Node transformQuery(Node queryTree) throws Exception {
77          queryTree = StructuredQuery.copy(new AddCombineTraversal(), queryTree);
78          queryTree = StructuredQuery.copy(new WeightConversionTraversal(), queryTree);
79          queryTree = StructuredQuery.copy(new IndriWindowCompatibilityTraversal(), queryTree);
80          queryTree = StructuredQuery.copy(new TextFieldRewriteTraversal(index), queryTree);
81          queryTree = StructuredQuery.copy(new ImplicitFeatureCastTraversal(this), queryTree);
82          return queryTree;
83      }
84  
85      /***
86       * Evaluates a query.
87       *
88       * @param queryTree A query tree that has been already transformed with StructuredRetrieval.transformQuery.
89       * @param requested The number of documents to retrieve, at most.
90       * @return
91       * @throws java.lang.Exception
92       */
93      public ScoredDocument[] runQuery(Node queryTree, int requested) throws Exception {
94          // construct the query iterators
95          ScoreIterator iterator = (ScoreIterator) createIterator(queryTree);
96  
97          // now there should be an iterator at the root of this tree
98          PriorityQueue<ScoredDocument> queue = new PriorityQueue<ScoredDocument>();
99  
100         while (!iterator.isDone()) {
101             int document = iterator.nextCandidate();
102             int length = index.getLength(document);
103             double score = iterator.score(document, length);
104 
105             if (queue.size() <= requested || queue.peek().score < score) {
106                 ScoredDocument scoredDocument = new ScoredDocument(document, score);
107                 queue.add(scoredDocument);
108 
109                 if (queue.size() > requested) {
110                     queue.poll();
111                 }
112             }
113 
114             iterator.movePast(document);
115         }
116 
117         return getArrayResults(queue);
118     }
119 
120     public String getDocumentName(int document) {
121         return index.getDocumentName(document);
122     }
123 
124     public void close() throws IOException {
125         index.close();
126     }
127 }