View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   package org.galagosearch.core.scoring;
3   
4   import java.io.IOException;
5   import java.util.HashMap;
6   import org.galagosearch.core.types.DocumentLengthWordCount;
7   import org.galagosearch.core.types.DocumentWordProbability;
8   import org.galagosearch.tupleflow.Parameters;
9   import org.galagosearch.tupleflow.StandardStep;
10  import org.galagosearch.tupleflow.Utility;
11  
12  /***
13   *
14   * @author trevor
15   */
16  public class LinearSmoother extends StandardStep<DocumentLengthWordCount, DocumentWordProbability>
17          implements DistributionSmoother {
18      double lambda;
19      HashMap<String, Double> backgrounds;
20  
21      public LinearSmoother(Parameters.Value value, HashMap<String, Double> backgrounds) {
22          double lm = 0.4;
23  
24          if (value.containsKey("lambda")) {
25              lm = Double.parseDouble(value.get("lambda"));
26          }
27  
28          this.lambda = lm;
29          this.backgrounds = backgrounds;
30      }
31  
32      public LinearSmoother(double lambda, HashMap<String, Double> backgrounds) {
33          this.lambda = lambda;
34          this.backgrounds = backgrounds;
35      }
36  
37      public void process(DocumentLengthWordCount object) throws IOException {
38          double background = backgrounds.get(object.word);
39          double foreground = 0;
40  
41          if (object.length > 0) {
42              foreground = (double) object.count / (double) object.length;
43          }
44          double probability = lambda * foreground + (1 - lambda) * background;
45          processor.process(new DocumentWordProbability(object.document,
46                                                        Utility.makeBytes(object.word), probability));
47      }
48  
49      public double smooth(double background, int count, int length) {
50          return (1 - lambda) * (double) count / (double) length + lambda * background;
51      }
52  
53      public double smooth(String word, int count, int length) {
54          return smooth(backgrounds.get(word), count, length);
55      }
56  
57      public Class<DocumentLengthWordCount> getInputClass() {
58          return DocumentLengthWordCount.class;
59      }
60  
61      public Class<DocumentWordProbability> getOutputClass() {
62          return DocumentWordProbability.class;
63      }
64  }