View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.index;
4   
5   import java.io.BufferedOutputStream;
6   import java.io.DataOutputStream;
7   import java.io.FileNotFoundException;
8   import java.io.FileOutputStream;
9   import java.io.IOException;
10  import org.galagosearch.core.types.NumberedDocumentData;
11  import org.galagosearch.tupleflow.Counter;
12  import org.galagosearch.tupleflow.InputClass;
13  import org.galagosearch.tupleflow.Processor;
14  import org.galagosearch.tupleflow.TupleFlowParameters;
15  import org.galagosearch.tupleflow.execution.ErrorHandler;
16  import org.galagosearch.tupleflow.execution.Verification;
17  
18  /***
19   * Writes the document lengths file based on data in NumberedDocumentData tuples.
20   * The document lengths data is used by StructuredIndex because it's a key
21   * input to more scoring functions.
22   * 
23   * @author trevor
24   */
25  @InputClass(className = "org.galagosearch.core.types.NumberedDocumentData", order = {"+number"})
26  public class DocumentLengthsWriter implements Processor<NumberedDocumentData> {
27      DataOutputStream output;
28      int document = 0;
29      Counter documentsWritten = null;
30  
31      /*** Creates a new instance of DocumentLengthsWriter */
32      public DocumentLengthsWriter(TupleFlowParameters parameters) throws FileNotFoundException {
33          String filename = parameters.getXML().get("filename");
34          output = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(filename)));
35          documentsWritten = parameters.getCounter("Documents Written");
36      }
37  
38      public void close() throws IOException {
39          output.close();
40      }
41  
42      public void process(NumberedDocumentData object) throws IOException {
43          assert document <= object.number : "d: " + document + " o.d:" + object.number;
44  
45          while (document < object.number) {
46              output.writeInt(0);
47              document++;
48          }
49  
50          output.writeInt(object.textLength);
51          document++;
52          if (documentsWritten != null) documentsWritten.increment();
53      }
54  
55      public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
56          if (!parameters.getXML().containsKey("filename")) {
57              handler.addError("DocumentLengthsWriter requires an 'filename' parameter.");
58              return;
59          }
60  
61          String filename = parameters.getXML().get("filename");
62          Verification.requireWriteableFile(filename, handler);
63      }
64  }