View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.parse;
4   
5   import java.io.FileNotFoundException;
6   import java.io.IOException;
7   import java.util.HashMap;
8   import org.galagosearch.core.index.IndexReader;
9   import org.galagosearch.tupleflow.DataStream;
10  import org.galagosearch.tupleflow.VByteInput;
11  
12  /***
13   *
14   * @author trevor
15   */
16  public class DocumentIndexReader {
17      IndexReader reader;
18  
19      public DocumentIndexReader(String fileName) throws FileNotFoundException, IOException {
20          reader = new IndexReader(fileName);
21      }
22  
23      public DocumentIndexReader(IndexReader reader) {
24          this.reader = reader;
25      }
26  
27      public void close() throws IOException {
28          reader.close();
29      }
30  
31      public Iterator getIterator() throws IOException {
32          return new Iterator(reader.getIterator());
33      }
34  
35      public Document getDocument(String key) throws IOException {
36          IndexReader.Iterator iterator = reader.getIterator(key);
37          if (iterator == null) return null;
38          return new Iterator(iterator).getDocument();
39      }
40  
41      public class Iterator {
42          IndexReader.Iterator iterator;
43  
44          Iterator(IndexReader.Iterator iterator) throws IOException {
45              this.iterator = iterator;
46          }
47  
48          public void skipTo(byte[] key) throws IOException {
49              iterator.skipTo(key);
50          }
51  
52          public String getKey() {
53              return iterator.getKey();
54          }
55  
56          public boolean isDone() {
57              return iterator.isDone();
58          }
59  
60          public Document getDocument() throws IOException {
61              String key = iterator.getKey();
62              DataStream stream = iterator.getValueStream();
63              return decodeDocument(key, stream);
64          }
65  
66          public boolean nextDocument() throws IOException {
67              return iterator.nextKey();
68          }
69  
70          Document decodeDocument(String key, DataStream stream) throws IOException {
71              VByteInput input = new VByteInput(stream);
72              Document document = new Document();
73  
74              // The first string is the document text, followed by
75              // key/value metadata pairs.
76              document.identifier = key;
77              document.text = input.readString();
78              document.metadata = new HashMap<String, String>();
79  
80              while (!stream.isDone()) {
81                  String mapKey = input.readString();
82                  String mapValue = input.readString();
83  
84                  document.metadata.put(mapKey, mapValue);
85              }
86  
87              return document;
88          }
89      }
90  }