View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.parse;
4   
5   import java.io.IOException;
6   import java.util.HashSet;
7   import org.galagosearch.tupleflow.InputClass;
8   import org.galagosearch.tupleflow.OutputClass;
9   import org.galagosearch.tupleflow.Parameters;
10  import org.galagosearch.tupleflow.StandardStep;
11  import org.galagosearch.tupleflow.TupleFlowParameters;
12  
13  /***
14   *
15   * @author trevor
16   */
17  
18  @InputClass(className="org.galagosearch.core.parse.Document")
19  @OutputClass(className="org.galagosearch.core.parse.Document")
20  public class DocumentFilter extends StandardStep<Document, Document> {
21      HashSet<String> docnos = new HashSet();
22      
23      /*** Creates a new instance of DocumentFilter */
24      public DocumentFilter(TupleFlowParameters parameters) {
25          Parameters p = parameters.getXML();
26          
27          for(String docno : p.stringList("identifier")) {
28              docnos.add(docno);
29          }
30      }
31      
32      public void process(Document document) throws IOException {
33          if (docnos.contains(document.identifier))
34              processor.process(document);
35      }
36      
37      public Class<Document> getOutputClass() {
38          return Document.class;
39      }
40      
41      public Class<Document> getInputClass() {
42          return Document.class;
43      }
44  }