1
2
3 package org.galagosearch.core.parse;
4
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7 import java.util.HashMap;
8 import org.galagosearch.core.index.IndexReader;
9 import org.galagosearch.tupleflow.DataStream;
10 import org.galagosearch.tupleflow.VByteInput;
11
12 /***
13 *
14 * @author trevor
15 */
16 public class DocumentIndexReader {
17 IndexReader reader;
18
19 public DocumentIndexReader(String fileName) throws FileNotFoundException, IOException {
20 reader = new IndexReader(fileName);
21 }
22
23 public DocumentIndexReader(IndexReader reader) {
24 this.reader = reader;
25 }
26
27 public void close() throws IOException {
28 reader.close();
29 }
30
31 public Iterator getIterator() throws IOException {
32 return new Iterator(reader.getIterator());
33 }
34
35 public Document getDocument(String key) throws IOException {
36 IndexReader.Iterator iterator = reader.getIterator(key);
37 if (iterator == null) return null;
38 return new Iterator(iterator).getDocument();
39 }
40
41 public class Iterator {
42 IndexReader.Iterator iterator;
43
44 Iterator(IndexReader.Iterator iterator) throws IOException {
45 this.iterator = iterator;
46 }
47
48 public void skipTo(byte[] key) throws IOException {
49 iterator.skipTo(key);
50 }
51
52 public String getKey() {
53 return iterator.getKey();
54 }
55
56 public boolean isDone() {
57 return iterator.isDone();
58 }
59
60 public Document getDocument() throws IOException {
61 String key = iterator.getKey();
62 DataStream stream = iterator.getValueStream();
63 return decodeDocument(key, stream);
64 }
65
66 public boolean nextDocument() throws IOException {
67 return iterator.nextKey();
68 }
69
70 Document decodeDocument(String key, DataStream stream) throws IOException {
71 VByteInput input = new VByteInput(stream);
72 Document document = new Document();
73
74
75
76 document.identifier = key;
77 document.text = input.readString();
78 document.metadata = new HashMap<String, String>();
79
80 while (!stream.isDone()) {
81 String mapKey = input.readString();
82 String mapValue = input.readString();
83
84 document.metadata.put(mapKey, mapValue);
85 }
86
87 return document;
88 }
89 }
90 }