1
2
3 package org.galagosearch.core.parse;
4
5 import java.io.IOException;
6 import java.util.HashSet;
7 import org.galagosearch.tupleflow.InputClass;
8 import org.galagosearch.tupleflow.OutputClass;
9 import org.galagosearch.tupleflow.Parameters;
10 import org.galagosearch.tupleflow.StandardStep;
11 import org.galagosearch.tupleflow.TupleFlowParameters;
12
13 /***
14 *
15 * @author trevor
16 */
17
18 @InputClass(className="org.galagosearch.core.parse.Document")
19 @OutputClass(className="org.galagosearch.core.parse.Document")
20 public class DocumentFilter extends StandardStep<Document, Document> {
21 HashSet<String> docnos = new HashSet();
22
23 /*** Creates a new instance of DocumentFilter */
24 public DocumentFilter(TupleFlowParameters parameters) {
25 Parameters p = parameters.getXML();
26
27 for(String docno : p.stringList("identifier")) {
28 docnos.add(docno);
29 }
30 }
31
32 public void process(Document document) throws IOException {
33 if (docnos.contains(document.identifier))
34 processor.process(document);
35 }
36
37 public Class<Document> getOutputClass() {
38 return Document.class;
39 }
40
41 public Class<Document> getInputClass() {
42 return Document.class;
43 }
44 }