Coverage Report - org.galagosearch.core.index.DocumentNameWriter
 
Classes in this File Line Coverage Branch Coverage Complexity
DocumentNameWriter
80%
41/51
56%
10/18
0
 
 1  
 // BSD License (http://www.galagosearch.org/license)
 2  
 
 3  
 package org.galagosearch.core.index;
 4  
 
 5  
 import java.io.BufferedOutputStream;
 6  
 import java.io.DataOutputStream;
 7  
 import java.io.FileNotFoundException;
 8  
 import java.io.FileOutputStream;
 9  
 import java.io.IOException;
 10  
 import java.util.ArrayList;
 11  
 import org.galagosearch.core.types.NumberedDocumentData;
 12  
 import org.galagosearch.tupleflow.Counter;
 13  
 import org.galagosearch.tupleflow.InputClass;
 14  
 import org.galagosearch.tupleflow.Processor;
 15  
 import org.galagosearch.tupleflow.TupleFlowParameters;
 16  
 import org.galagosearch.tupleflow.Utility;
 17  
 import org.galagosearch.tupleflow.execution.ErrorHandler;
 18  
 
 19  
 /**
 20  
  * Writes a list of document names to a binary file.
 21  
  * This class assumes that a document name is a string that contains at least
 22  
  * one hyphen ('-') followed entirely by numbers.  All TREC document names
 23  
  * follow this convention, e.g.:  WTX-B01-0001.
 24  
  *
 25  
  * @author Trevor Strohman
 26  
  */
 27  
 @InputClass(className = "org.galagosearch.core.types.NumberedDocumentData")
 28  4
 public class DocumentNameWriter implements Processor<NumberedDocumentData> {
 29  12
     String lastHeader = null;
 30  
     DataOutputStream output;
 31  12
     int lastFooterWidth = 0;
 32  12
     int lastDocument = -1;
 33  
     ArrayList<Integer> footers;
 34  12
     Counter documentsWritten = null;
 35  
 
 36  12
     public DocumentNameWriter(TupleFlowParameters parameters) throws FileNotFoundException, IOException {
 37  12
         String filename = parameters.getXML().get("filename");
 38  12
         footers = new ArrayList<Integer>();
 39  12
         output = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(filename)));
 40  12
         documentsWritten = parameters.getCounter("Documents Written");
 41  12
     }
 42  
 
 43  
     public void flush() throws IOException {
 44  252
         if (footers.size() == 0) {
 45  12
             return;
 46  
         }
 47  
 
 48  240
         byte[] headerBytes = Utility.makeBytes(lastHeader);
 49  240
         output.writeInt(headerBytes.length);
 50  240
         output.write(headerBytes);
 51  240
         output.writeInt(lastFooterWidth);
 52  240
         output.writeInt(footers.size());
 53  
 
 54  240
         for (int footerValue : footers) {
 55  240
             output.writeInt(footerValue);
 56  
         }
 57  240
     }
 58  
 
 59  
     public void process(NumberedDocumentData numberedDocumentData) throws IOException {
 60  240
         assert numberedDocumentData.number - 1 == lastDocument;
 61  240
         lastDocument = numberedDocumentData.number;
 62  
 
 63  240
         String documentName = numberedDocumentData.identifier;
 64  240
         int lastDash = documentName.lastIndexOf("-");
 65  
 
 66  240
         if (lastDash == -1) {
 67  240
             putName(documentName, 0, 0);
 68  
         } else {
 69  0
             String header = documentName.substring(0, lastDash);
 70  0
             String footer = documentName.substring(lastDash + 1);
 71  
 
 72  
             try {
 73  0
                 int footerValue = Integer.parseInt(footer);
 74  0
                 putName(header, footerValue, footer.length());
 75  0
             } catch (NumberFormatException e) {
 76  0
                 putName(documentName, 0, 0);
 77  0
             }
 78  
         }
 79  
 
 80  240
         if (documentsWritten != null) documentsWritten.increment();
 81  240
     }
 82  
 
 83  
     public void putName(String header, int footer, int footerWidth) throws IOException {
 84  240
         if (header.equals(lastHeader) && footerWidth == lastFooterWidth) {
 85  0
             footers.add(footer);
 86  
         } else {
 87  240
             flush();
 88  240
             lastHeader = header;
 89  240
             footers = new ArrayList<Integer>();
 90  240
             footers.add(footer);
 91  240
             lastFooterWidth = footerWidth;
 92  
         }
 93  240
     }
 94  
 
 95  
     public void close() throws IOException {
 96  12
         flush();
 97  12
         output.close();
 98  12
     }
 99  
 
 100  
     public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
 101  16
         if (!parameters.getXML().containsKey("filename")) {
 102  0
             handler.addError("DocumentNameWriter requires an 'filename' parameter.");
 103  0
             return;
 104  
         }
 105  16
     }
 106  
 }