View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   package org.galagosearch.core.scoring;
3   
4   import java.io.IOException;
5   import org.galagosearch.core.retrieval.structured.CountIterator;
6   import org.galagosearch.core.retrieval.structured.RequiredStatistics;
7   import org.galagosearch.core.retrieval.structured.ScoringFunctionIterator;
8   import org.galagosearch.tupleflow.Parameters;
9   
10  /***
11   *
12   * @author trevor
13   */
14  @RequiredStatistics(statistics = {"collectionLength"})
15  public class DirichletScorer extends ScoringFunctionIterator {
16      double background;
17      double mu;
18  
19      public DirichletScorer(Parameters parameters, CountIterator iterator) throws IOException {
20          super(iterator);
21  
22          mu = parameters.get("mu", 1500);
23          if (parameters.containsKey("collectionProbability")) {
24              background = parameters.get("collectionProbability", 0.0001);
25          } else {
26              long collectionLength = parameters.get("collectionLength", (long)0);
27              long count = 0;
28              
29              while (!iterator.isDone()) {
30                  count += iterator.count();
31                  iterator.nextDocument();
32              }
33              
34              background = (double)count / (double)collectionLength;
35              iterator.reset();
36          }
37      }
38  
39      public double scoreCount(int count, int length) {
40          double numerator = count + mu * background;
41          double denominator = length + mu;
42  
43          return Math.log(numerator / denominator);
44      }
45  }
46