1
2 package org.galagosearch.core.index;
3
4 import java.util.ArrayList;
5 import org.galagosearch.tupleflow.Parameters;
6 import org.galagosearch.tupleflow.Processor;
7 import org.galagosearch.core.parse.Document;
8 import org.galagosearch.core.parse.Porter2Stemmer;
9 import org.galagosearch.core.parse.WordFilter;
10 import java.util.HashSet;
11
12 /***
13 *
14 * @author trevor
15 */
16 public class DocumentTransformationFactory {
17 public static Processor<Document> instance(Parameters parameters, String key) {
18 Processor<Document> processor = null;
19
20 if (key.equals("stopper") && parameters.containsKey("stopper")) {
21 HashSet<String> stopwords = new HashSet<String>(parameters.stringList("stopper/word"));
22 processor = new WordFilter(stopwords);
23 }
24
25 if (key.equals("stemmer") && parameters.get("stemmer", "none").equals("porter2")) {
26 processor = new Porter2Stemmer();
27 }
28
29 return processor;
30 }
31
32 public static ArrayList<Processor<Document>> instance(Parameters parameters) {
33 ArrayList<Processor<Document>> transformations = new ArrayList<Processor<Document>>();
34 String[] transformationNames = {"stopper", "stemmer"};
35
36 for (String name : transformationNames) {
37 Processor<Document> transformation = instance(parameters, name);
38
39 if (transformation != null) {
40 transformations.add(transformation);
41 }
42 }
43
44 return transformations;
45 }
46 }