View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.parse;
4   
5   import java.io.IOException;
6   import java.util.HashMap;
7   import java.util.List;
8   import org.galagosearch.tupleflow.InputClass;
9   import org.galagosearch.tupleflow.OutputClass;
10  import org.galagosearch.tupleflow.Parameters.Value;
11  import org.galagosearch.tupleflow.StandardStep;
12  import org.galagosearch.tupleflow.TupleFlowParameters;
13  import org.galagosearch.tupleflow.execution.Verified;
14  
15  /***
16   *
17   * @author trevor
18   */
19  @InputClass(className = "org.galagosearch.core.parse.Document")
20  @OutputClass(className = "org.galagosearch.core.parse.Document")
21  @Verified
22  public class FieldConflater extends StandardStep<Document, Document> {
23      HashMap<String, String> conflations = new HashMap<String, String>();
24  
25      public FieldConflater(TupleFlowParameters parameters) {
26          List<Value> values = parameters.getXML().list("field");
27  
28          for (Value field : values) {
29              List<String> sources = field.stringList("source");
30              String destination = field.get("destination");
31  
32              for (String s : sources) {
33                  conflations.put(s, destination);
34              }
35          }
36      }
37  
38      public void process(Document document) throws IOException {
39          for (Tag tag : document.tags) {
40              if (conflations.containsKey(tag.name)) {
41                  tag.name = conflations.get(tag.name);
42              }
43          }
44  
45          processor.process(document);
46      }
47  }