1
2
3 package org.galagosearch.core.index;
4
5 import java.io.BufferedOutputStream;
6 import java.io.DataOutputStream;
7 import java.io.FileNotFoundException;
8 import java.io.FileOutputStream;
9 import java.io.IOException;
10 import org.galagosearch.core.types.NumberedDocumentData;
11 import org.galagosearch.tupleflow.Counter;
12 import org.galagosearch.tupleflow.InputClass;
13 import org.galagosearch.tupleflow.Processor;
14 import org.galagosearch.tupleflow.TupleFlowParameters;
15 import org.galagosearch.tupleflow.execution.ErrorHandler;
16 import org.galagosearch.tupleflow.execution.Verification;
17
18 /***
19 * Writes the document lengths file based on data in NumberedDocumentData tuples.
20 * The document lengths data is used by StructuredIndex because it's a key
21 * input to more scoring functions.
22 *
23 * @author trevor
24 */
25 @InputClass(className = "org.galagosearch.core.types.NumberedDocumentData", order = {"+number"})
26 public class DocumentLengthsWriter implements Processor<NumberedDocumentData> {
27 DataOutputStream output;
28 int document = 0;
29 Counter documentsWritten = null;
30
31 /*** Creates a new instance of DocumentLengthsWriter */
32 public DocumentLengthsWriter(TupleFlowParameters parameters) throws FileNotFoundException {
33 String filename = parameters.getXML().get("filename");
34 output = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(filename)));
35 documentsWritten = parameters.getCounter("Documents Written");
36 }
37
38 public void close() throws IOException {
39 output.close();
40 }
41
42 public void process(NumberedDocumentData object) throws IOException {
43 assert document <= object.number : "d: " + document + " o.d:" + object.number;
44
45 while (document < object.number) {
46 output.writeInt(0);
47 document++;
48 }
49
50 output.writeInt(object.textLength);
51 document++;
52 if (documentsWritten != null) documentsWritten.increment();
53 }
54
55 public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
56 if (!parameters.getXML().containsKey("filename")) {
57 handler.addError("DocumentLengthsWriter requires an 'filename' parameter.");
58 return;
59 }
60
61 String filename = parameters.getXML().get("filename");
62 Verification.requireWriteableFile(filename, handler);
63 }
64 }