Coverage Report - org.galagosearch.core.scoring.DirichletSmoother
 
Classes in this File Line Coverage Branch Coverage Complexity
DirichletSmoother
0%
0/26
0%
0/6
0
 
 1  
 // BSD License (http://www.galagosearch.org/license)
 2  
 package org.galagosearch.core.scoring;
 3  
 
 4  
 import java.io.IOException;
 5  
 import java.util.HashMap;
 6  
 import org.galagosearch.core.types.DocumentLengthWordCount;
 7  
 import org.galagosearch.core.types.DocumentWordProbability;
 8  
 import org.galagosearch.core.types.WordProbability;
 9  
 import org.galagosearch.tupleflow.StandardStep;
 10  
 import org.galagosearch.tupleflow.TupleFlowParameters;
 11  
 import org.galagosearch.tupleflow.TypeReader;
 12  
 import org.galagosearch.tupleflow.Utility;
 13  
 import org.galagosearch.tupleflow.execution.ErrorHandler;
 14  
 import org.galagosearch.tupleflow.execution.Verification;
 15  
 
 16  
 /**
 17  
  *
 18  
  * @author trevor
 19  
  */
 20  0
 public class DirichletSmoother extends StandardStep<DocumentLengthWordCount, DocumentWordProbability>
 21  
         implements DistributionSmoother {
 22  
     double mu;
 23  
     HashMap<String, Double> backgrounds;
 24  
 
 25  0
     public DirichletSmoother(double mu, HashMap<String, Double> backgrounds) {
 26  0
         this.mu = mu;
 27  0
         this.backgrounds = backgrounds;
 28  0
     }
 29  
 
 30  0
     public DirichletSmoother(TupleFlowParameters parameters) throws IOException {
 31  0
         this.mu = parameters.getXML().get("mu", 1500);
 32  0
         TypeReader<WordProbability> backgroundReader = parameters.getTypeReader("background");
 33  0
         WordProbability backgroundObject = null;
 34  0
         this.backgrounds = new HashMap<String, Double>();
 35  
 
 36  0
         while ((backgroundObject = backgroundReader.read()) != null) {
 37  0
             backgrounds.put(backgroundObject.word, backgroundObject.probability);
 38  
         }
 39  0
     }
 40  
 
 41  
     public void process(DocumentLengthWordCount object) throws IOException {
 42  0
         double probability = smooth(object.word, object.count, object.length);
 43  0
         processor.process(new DocumentWordProbability(object.document,
 44  
                                                       Utility.makeBytes(object.word), probability));
 45  0
     }
 46  
 
 47  
     public double smooth(double background, int count, int length) {
 48  0
         double numerator = count + mu * background;
 49  0
         double denominator = length + mu;
 50  
 
 51  0
         return numerator / denominator;
 52  
     }
 53  
 
 54  
     public double smooth(String word, int count, int length) {
 55  0
         Double background = backgrounds.get(word);
 56  0
         assert background != null : "Couldn't find " + word + " in backgrounds: " + backgrounds.size();
 57  0
         return smooth(background, count, length);
 58  
     }
 59  
 
 60  
     public Class<DocumentLengthWordCount> getInputClass() {
 61  0
         return DocumentLengthWordCount.class;
 62  
     }
 63  
 
 64  
     public Class<DocumentWordProbability> getOutputClass() {
 65  0
         return DocumentWordProbability.class;
 66  
     }
 67  
 
 68  
     public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
 69  0
         Verification.verifyTypeReader("background", WordProbability.class, parameters, handler);
 70  0
     }
 71  
 }