1
2 package org.galagosearch.core.scoring;
3
4 import java.io.IOException;
5 import java.util.HashMap;
6 import org.galagosearch.core.types.DocumentLengthWordCount;
7 import org.galagosearch.core.types.DocumentWordProbability;
8 import org.galagosearch.tupleflow.Parameters;
9 import org.galagosearch.tupleflow.StandardStep;
10 import org.galagosearch.tupleflow.Utility;
11
12 /***
13 *
14 * @author trevor
15 */
16 public class LinearSmoother extends StandardStep<DocumentLengthWordCount, DocumentWordProbability>
17 implements DistributionSmoother {
18 double lambda;
19 HashMap<String, Double> backgrounds;
20
21 public LinearSmoother(Parameters.Value value, HashMap<String, Double> backgrounds) {
22 double lm = 0.4;
23
24 if (value.containsKey("lambda")) {
25 lm = Double.parseDouble(value.get("lambda"));
26 }
27
28 this.lambda = lm;
29 this.backgrounds = backgrounds;
30 }
31
32 public LinearSmoother(double lambda, HashMap<String, Double> backgrounds) {
33 this.lambda = lambda;
34 this.backgrounds = backgrounds;
35 }
36
37 public void process(DocumentLengthWordCount object) throws IOException {
38 double background = backgrounds.get(object.word);
39 double foreground = 0;
40
41 if (object.length > 0) {
42 foreground = (double) object.count / (double) object.length;
43 }
44 double probability = lambda * foreground + (1 - lambda) * background;
45 processor.process(new DocumentWordProbability(object.document,
46 Utility.makeBytes(object.word), probability));
47 }
48
49 public double smooth(double background, int count, int length) {
50 return (1 - lambda) * (double) count / (double) length + lambda * background;
51 }
52
53 public double smooth(String word, int count, int length) {
54 return smooth(backgrounds.get(word), count, length);
55 }
56
57 public Class<DocumentLengthWordCount> getInputClass() {
58 return DocumentLengthWordCount.class;
59 }
60
61 public Class<DocumentWordProbability> getOutputClass() {
62 return DocumentWordProbability.class;
63 }
64 }