Coverage Report - org.galagosearch.core.parse.DocumentIndexWriter
 
Classes in this File Line Coverage Branch Coverage Complexity
DocumentIndexWriter
84%
21/25
67%
4/6
0
 
 1  
 // BSD License (http://www.galagosearch.org/license)
 2  
 
 3  
 package org.galagosearch.core.parse;
 4  
 
 5  
 import java.io.ByteArrayOutputStream;
 6  
 import java.io.DataOutputStream;
 7  
 import java.io.FileNotFoundException;
 8  
 import java.io.IOException;
 9  
 import java.util.Map;
 10  
 import org.galagosearch.core.index.GenericElement;
 11  
 import org.galagosearch.core.index.IndexWriter;
 12  
 import org.galagosearch.tupleflow.Counter;
 13  
 import org.galagosearch.tupleflow.InputClass;
 14  
 import org.galagosearch.tupleflow.Parameters;
 15  
 import org.galagosearch.tupleflow.Processor;
 16  
 import org.galagosearch.tupleflow.TupleFlowParameters;
 17  
 import org.galagosearch.tupleflow.VByteOutput;
 18  
 import org.galagosearch.tupleflow.execution.ErrorHandler;
 19  
 import org.galagosearch.tupleflow.execution.Verification;
 20  
 
 21  
 /**
 22  
  * Writes document text and metadata to an index file.  The output files
 23  
  * are in '.corpus' format, which can be fed to UniversalParser as an input
 24  
  * to indexing.  The '.corpus' format is also convenient for quickly
 25  
  * finding individual documents.
 26  
  * 
 27  
  * @author trevor
 28  
  */
 29  
 @InputClass(className = "org.galagosearch.core.parse.Document")
 30  0
 public class DocumentIndexWriter implements Processor<Document> {
 31  
     IndexWriter writer;
 32  
     Counter documentsWritten;
 33  
     
 34  12
     public DocumentIndexWriter(TupleFlowParameters parameters) throws FileNotFoundException, IOException {
 35  12
         Parameters p = new Parameters();
 36  12
         p.add("isCompressed", "true");
 37  12
         writer = new IndexWriter(parameters.getXML().get("filename"), p);
 38  12
         documentsWritten = parameters.getCounter("Documents Written");
 39  12
     }
 40  
     
 41  
     public void close() throws IOException {
 42  12
         writer.close();
 43  12
     }
 44  
 
 45  
     public void process(Document document) throws IOException {
 46  12
         ByteArrayOutputStream stream = new ByteArrayOutputStream();
 47  12
         VByteOutput output = new VByteOutput(new DataOutputStream(stream));
 48  
         
 49  12
         output.writeString(document.text);
 50  12
         for (Map.Entry<String, String> entry : document.metadata.entrySet()) {
 51  24
             output.writeString(entry.getKey());
 52  24
             output.writeString(entry.getValue());
 53  
         }
 54  
         
 55  12
         writer.add(new GenericElement(document.identifier, stream.toByteArray()));
 56  12
         if (documentsWritten != null)
 57  0
             documentsWritten.increment();
 58  12
     }
 59  
 
 60  
     public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
 61  4
         if (!parameters.getXML().containsKey("filename")) {
 62  0
             handler.addError("DocumentIndexWriter requires an 'filename' parameter.");
 63  0
             return;
 64  
         }
 65  
 
 66  4
         String index = parameters.getXML().get("filename");
 67  4
         Verification.requireWriteableFile(index, handler);
 68  4
     }
 69  
 }