1
2 package org.galagosearch.core.scoring;
3
4 import java.io.IOException;
5 import org.galagosearch.core.retrieval.structured.CountIterator;
6 import org.galagosearch.core.retrieval.structured.RequiredStatistics;
7 import org.galagosearch.core.retrieval.structured.ScoringFunctionIterator;
8 import org.galagosearch.tupleflow.Parameters;
9
10 /***
11 *
12 * @author trevor
13 */
14 @RequiredStatistics(statistics = {"collectionLength"})
15 public class DirichletScorer extends ScoringFunctionIterator {
16 double background;
17 double mu;
18
19 public DirichletScorer(Parameters parameters, CountIterator iterator) throws IOException {
20 super(iterator);
21
22 mu = parameters.get("mu", 1500);
23 if (parameters.containsKey("collectionProbability")) {
24 background = parameters.get("collectionProbability", 0.0001);
25 } else {
26 long collectionLength = parameters.get("collectionLength", (long)0);
27 long count = 0;
28
29 while (!iterator.isDone()) {
30 count += iterator.count();
31 iterator.nextDocument();
32 }
33
34 background = (double)count / (double)collectionLength;
35 iterator.reset();
36 }
37 }
38
39 public double scoreCount(int count, int length) {
40 double numerator = count + mu * background;
41 double denominator = length + mu;
42
43 return Math.log(numerator / denominator);
44 }
45 }
46