Coverage Report - org.galagosearch.core.parse.DocumentDataExtractor
 
Classes in this File Line Coverage Branch Coverage Complexity
DocumentDataExtractor
0%
0/9
0%
0/2
0
 
 1  
 // BSD License (http://www.galagosearch.org/license)
 2  
 
 3  
 package org.galagosearch.core.parse;
 4  
 
 5  
 import java.io.IOException;
 6  
 import org.galagosearch.core.types.DocumentData;
 7  
 import org.galagosearch.tupleflow.InputClass;
 8  
 import org.galagosearch.tupleflow.OutputClass;
 9  
 import org.galagosearch.tupleflow.StandardStep;
 10  
 import org.galagosearch.tupleflow.execution.Verified;
 11  
 
 12  
 /**
 13  
  * Copies a few pieces of metadata about a document (identifier, url, length) from
 14  
  * a document object and stores them in a DocumentData tuple.
 15  
  * 
 16  
  * @author trevor
 17  
  */
 18  
 @InputClass(className = "org.galagosearch.core.parse.Document")
 19  
 @OutputClass(className = "org.galagosearch.core.types.DocumentData")
 20  
 @Verified
 21  0
 public class DocumentDataExtractor extends StandardStep<Document, DocumentData> {
 22  
     public void process(Document document) throws IOException {
 23  0
         DocumentData data = new DocumentData();
 24  0
         data.identifier = document.identifier;
 25  0
         data.url = "";
 26  0
         if (document.metadata.containsKey("url")) {
 27  0
             data.url = document.metadata.get("url");
 28  
         }
 29  0
         data.textLength = document.terms.size();
 30  
 
 31  0
         processor.process(data);
 32  0
     }
 33  
 }