View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   package org.galagosearch.core.index;
3   
4   import java.util.ArrayList;
5   import org.galagosearch.tupleflow.Parameters;
6   import org.galagosearch.tupleflow.Processor;
7   import org.galagosearch.core.parse.Document;
8   import org.galagosearch.core.parse.Porter2Stemmer;
9   import org.galagosearch.core.parse.WordFilter;
10  import java.util.HashSet;
11  
12  /***
13   *
14   * @author trevor
15   */
16  public class DocumentTransformationFactory {
17      public static Processor<Document> instance(Parameters parameters, String key) {
18          Processor<Document> processor = null;
19  
20          if (key.equals("stopper") && parameters.containsKey("stopper")) {
21              HashSet<String> stopwords = new HashSet<String>(parameters.stringList("stopper/word"));
22              processor = new WordFilter(stopwords);
23          }
24  
25          if (key.equals("stemmer") && parameters.get("stemmer", "none").equals("porter2")) {
26              processor = new Porter2Stemmer();
27          }
28  
29          return processor;
30      }
31  
32      public static ArrayList<Processor<Document>> instance(Parameters parameters) {
33          ArrayList<Processor<Document>> transformations = new ArrayList<Processor<Document>>();
34          String[] transformationNames = {"stopper", "stemmer"};
35  
36          for (String name : transformationNames) {
37              Processor<Document> transformation = instance(parameters, name);
38  
39              if (transformation != null) {
40                  transformations.add(transformation);
41              }
42          }
43  
44          return transformations;
45      }
46  }