View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.parse;
4   
5   import java.io.IOException;
6   import org.galagosearch.core.types.AdditionalDocumentText;
7   import org.galagosearch.tupleflow.InputClass;
8   import org.galagosearch.tupleflow.OutputClass;
9   import org.galagosearch.tupleflow.StandardStep;
10  import org.galagosearch.tupleflow.TupleFlowParameters;
11  import org.galagosearch.tupleflow.TypeReader;
12  import org.galagosearch.tupleflow.Utility;
13  import org.galagosearch.tupleflow.execution.ErrorHandler;
14  import org.galagosearch.tupleflow.execution.Verification;
15  
16  /***
17   * Adds tuples of type AdditionalDocumentText to the end of the text field in
18   * a document.  The AdditionalDocumentText stream is specified in the
19   * textSource parameter.  This stage should be used before document tokenizing.
20   * 
21   * @author trevor
22   */
23  @InputClass(className = "org.galagosearch.core.parse.Document")
24  @OutputClass(className = "org.galagosearch.core.parse.Document")
25  public class AdditionalTextCombiner extends StandardStep<Document, Document> {
26      TypeReader<AdditionalDocumentText> text;
27      AdditionalDocumentText last;
28  
29      @SuppressWarnings("unchecked")
30      public AdditionalTextCombiner(TupleFlowParameters parameters) throws IOException {
31          String readerName = parameters.getXML().get("textSource");
32          text = parameters.getTypeReader(readerName);
33          last = text.read();
34      }
35  
36      public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
37          if (!Verification.requireParameters(new String[] { "textSource" }, parameters.getXML(), handler))
38              return;
39  
40          String readerName = parameters.getXML().get("textSource");
41          Verification.verifyTypeReader(readerName, AdditionalDocumentText.class, parameters, handler);
42      }
43  
44      @Override
45      public void process(Document document) throws IOException {
46          while (last != null && Utility.compare(last.identifier, document.identifier) < 0) {
47              last = text.read();
48          }
49  
50          if (last != null && last.identifier.equals(document.identifier)) {
51              document.text += last.text;
52          }
53  
54          processor.process(document);
55      }
56  }