| 1 | |
|
| 2 | |
package org.galagosearch.core.index; |
| 3 | |
|
| 4 | |
import java.util.ArrayList; |
| 5 | |
import org.galagosearch.tupleflow.Parameters; |
| 6 | |
import org.galagosearch.tupleflow.Processor; |
| 7 | |
import org.galagosearch.core.parse.Document; |
| 8 | |
import org.galagosearch.core.parse.Porter2Stemmer; |
| 9 | |
import org.galagosearch.core.parse.WordFilter; |
| 10 | |
import java.util.HashSet; |
| 11 | |
|
| 12 | |
|
| 13 | |
|
| 14 | |
|
| 15 | |
|
| 16 | 0 | public class DocumentTransformationFactory { |
| 17 | |
public static Processor<Document> instance(Parameters parameters, String key) { |
| 18 | 0 | Processor<Document> processor = null; |
| 19 | |
|
| 20 | 0 | if (key.equals("stopper") && parameters.containsKey("stopper")) { |
| 21 | 0 | HashSet<String> stopwords = new HashSet<String>(parameters.stringList("stopper/word")); |
| 22 | 0 | processor = new WordFilter(stopwords); |
| 23 | |
} |
| 24 | |
|
| 25 | 0 | if (key.equals("stemmer") && parameters.get("stemmer", "none").equals("porter2")) { |
| 26 | 0 | processor = new Porter2Stemmer(); |
| 27 | |
} |
| 28 | |
|
| 29 | 0 | return processor; |
| 30 | |
} |
| 31 | |
|
| 32 | |
public static ArrayList<Processor<Document>> instance(Parameters parameters) { |
| 33 | 0 | ArrayList<Processor<Document>> transformations = new ArrayList<Processor<Document>>(); |
| 34 | 0 | String[] transformationNames = {"stopper", "stemmer"}; |
| 35 | |
|
| 36 | 0 | for (String name : transformationNames) { |
| 37 | 0 | Processor<Document> transformation = instance(parameters, name); |
| 38 | |
|
| 39 | 0 | if (transformation != null) { |
| 40 | 0 | transformations.add(transformation); |
| 41 | |
} |
| 42 | |
} |
| 43 | |
|
| 44 | 0 | return transformations; |
| 45 | |
} |
| 46 | |
} |