| 1 | |
|
| 2 | |
package org.galagosearch.core.index; |
| 3 | |
|
| 4 | |
import java.io.FileNotFoundException; |
| 5 | |
import java.io.IOException; |
| 6 | |
import java.io.OutputStream; |
| 7 | |
import org.galagosearch.core.types.NumberWordProbability; |
| 8 | |
import org.galagosearch.tupleflow.TupleFlowParameters; |
| 9 | |
|
| 10 | |
|
| 11 | |
|
| 12 | |
|
| 13 | |
|
| 14 | |
public class SparseFloatListWriter implements |
| 15 | |
NumberWordProbability.NumberWordOrder.ShreddedProcessor { |
| 16 | |
IndexWriter writer; |
| 17 | |
DoubleInvertedList list; |
| 18 | |
|
| 19 | |
public class DoubleInvertedList implements IndexElement { |
| 20 | 24 | BackedCompressedByteBuffer data = new BackedCompressedByteBuffer(); |
| 21 | 24 | CompressedByteBuffer header = new CompressedByteBuffer(); |
| 22 | |
int lastDocument; |
| 23 | |
int documentCount; |
| 24 | |
byte[] word; |
| 25 | |
|
| 26 | 24 | public DoubleInvertedList(byte[] word) { |
| 27 | 24 | this.word = word; |
| 28 | 24 | this.lastDocument = 0; |
| 29 | 24 | this.documentCount = 0; |
| 30 | 24 | } |
| 31 | |
|
| 32 | |
public void write(final OutputStream stream) throws IOException { |
| 33 | 24 | header.write(stream); |
| 34 | 24 | data.write(stream); |
| 35 | 24 | } |
| 36 | |
|
| 37 | |
public void addDocument(int document) throws IOException { |
| 38 | 60 | data.add(document - lastDocument); |
| 39 | 60 | documentCount++; |
| 40 | 60 | lastDocument = document; |
| 41 | 60 | } |
| 42 | |
|
| 43 | |
public void addProbability(double probability) throws IOException { |
| 44 | 60 | data.addFloat((float) probability); |
| 45 | 60 | } |
| 46 | |
|
| 47 | |
public byte[] key() { |
| 48 | 144 | return word; |
| 49 | |
} |
| 50 | |
|
| 51 | |
public long dataLength() { |
| 52 | 120 | return data.length() + header.length(); |
| 53 | |
} |
| 54 | |
|
| 55 | |
public void close() { |
| 56 | 24 | header.add(documentCount); |
| 57 | 24 | } |
| 58 | |
} |
| 59 | |
|
| 60 | |
|
| 61 | 12 | public SparseFloatListWriter(TupleFlowParameters parameters) throws FileNotFoundException, IOException { |
| 62 | 12 | writer = new IndexWriter(parameters); |
| 63 | 12 | writer.getManifest().add("readerClass", SparseFloatListReader.class.getName()); |
| 64 | 12 | writer.getManifest().add("writerClass", getClass().getName()); |
| 65 | 12 | } |
| 66 | |
|
| 67 | |
public void processWord(byte[] word) throws IOException { |
| 68 | 24 | if (list != null) { |
| 69 | 12 | list.close(); |
| 70 | 12 | writer.add(list); |
| 71 | |
} |
| 72 | |
|
| 73 | 24 | list = new DoubleInvertedList(word); |
| 74 | 24 | } |
| 75 | |
|
| 76 | |
public void processNumber(int number) throws IOException { |
| 77 | 60 | list.addDocument(number); |
| 78 | 60 | } |
| 79 | |
|
| 80 | |
public void processTuple(double probability) throws IOException { |
| 81 | 60 | list.addProbability(probability); |
| 82 | 60 | } |
| 83 | |
|
| 84 | |
public void close() throws IOException { |
| 85 | 12 | if (list != null) { |
| 86 | 12 | list.close(); |
| 87 | 12 | writer.add(list); |
| 88 | |
} |
| 89 | |
|
| 90 | 12 | writer.close(); |
| 91 | 12 | } |
| 92 | |
} |