Coverage Report - org.galagosearch.core.eval.SetRetrievalComparator
 
Classes in this File Line Coverage Branch Coverage Complexity
SetRetrievalComparator
0%
0/136
0%
0/66
0
 
 1  
 // BSD License (http://www.galagosearch.org/license)
 2  
 
 3  
 package org.galagosearch.core.eval;
 4  
 
 5  
 import java.util.Map;
 6  
 import java.util.Random;
 7  
 import java.util.Set;
 8  
 import java.util.TreeSet;
 9  
 import org.galagosearch.core.eval.stat.Stat;
 10  
 
 11  
 /**
 12  
  *
 13  
  * @author trevor
 14  
  */
 15  
 public class SetRetrievalComparator {
 16  
     double[] baseline;
 17  
     double[] treatment;
 18  
 
 19  
     /** Creates a new instance of SetRetrievalComparator */
 20  0
     public SetRetrievalComparator(Map<String, Double> baseline, Map<String, Double> treatment) {
 21  0
         Set<String> commonQueries = new TreeSet<String>(baseline.keySet());
 22  0
         commonQueries.retainAll(treatment.keySet());
 23  
 
 24  0
         this.baseline = new double[commonQueries.size()];
 25  0
         this.treatment = new double[commonQueries.size()];
 26  0
         int i = 0;
 27  
 
 28  0
         for (String key : commonQueries) {
 29  0
             this.baseline[i] = baseline.get(key);
 30  0
             this.treatment[i] = treatment.get(key);
 31  0
             i++;
 32  
         }
 33  0
     }
 34  
 
 35  
     private double[] multiply(double[] numbers, double boost) {
 36  0
         double[] result = new double[numbers.length];
 37  
 
 38  0
         for (int i = 0; i < result.length; i++) {
 39  0
             result[i] = numbers[i] * boost;
 40  
         }
 41  
 
 42  0
         return result;
 43  
     }
 44  
 
 45  
     private double mean(double[] numbers) {
 46  0
         double sum = 0;
 47  0
         for (int i = 0; i < numbers.length; i++) {
 48  0
             sum += numbers[i];
 49  
         }
 50  
 
 51  0
         return sum / (double) numbers.length;
 52  
     }
 53  
 
 54  
     public double meanBaselineMetric() {
 55  0
         return mean(baseline);
 56  
     }
 57  
 
 58  
     public double meanTreatmentMetric() {
 59  0
         return mean(treatment);
 60  
     }
 61  
 
 62  
     public int countTreatmentBetter() {
 63  0
         int better = 0;
 64  
 
 65  0
         for (int i = 0; i < baseline.length; i++) {
 66  0
             if (baseline[i] < treatment[i]) {
 67  0
                 better++;
 68  
             }
 69  
         }
 70  
 
 71  0
         return better;
 72  
     }
 73  
 
 74  
     public int countBaselineBetter() {
 75  0
         int better = 0;
 76  
 
 77  0
         for (int i = 0; i < baseline.length; i++) {
 78  0
             if (baseline[i] > treatment[i]) {
 79  0
                 better++;
 80  
             }
 81  
         }
 82  
 
 83  0
         return better;
 84  
     }
 85  
 
 86  
     public int countEqual() {
 87  0
         int same = 0;
 88  
 
 89  0
         for (int i = 0; i < baseline.length; i++) {
 90  0
             if (baseline[i] == treatment[i]) {
 91  0
                 same++;
 92  
             }
 93  
         }
 94  
 
 95  0
         return same;
 96  
     }
 97  
 
 98  
     public double supportedHypothesis(String testName, double pvalue) {
 99  0
         double currentBoost = 1.0;
 100  0
         double currentPvalue = test(testName, currentBoost);
 101  0
         double lastBoost = 1.0;
 102  0
         double lastPvalue = currentPvalue;
 103  0
         int iterations = 0;
 104  
 
 105  
         // search until we find an interval
 106  0
         while ((lastPvalue < pvalue) == (currentPvalue < pvalue)) {
 107  0
             double nextBoost = currentBoost;
 108  
 
 109  0
             if (currentPvalue < pvalue) {
 110  0
                 nextBoost *= 1.05;
 111  0
             } else if (currentPvalue > pvalue) {
 112  0
                 nextBoost *= 0.95;
 113  
             }
 114  
 
 115  0
             double nextPvalue = test(testName, nextBoost);
 116  
 
 117  0
             lastBoost = currentBoost;
 118  0
             lastPvalue = currentPvalue;
 119  0
             currentBoost = nextBoost;
 120  0
             currentPvalue = nextPvalue;
 121  
 
 122  0
             iterations++;
 123  
 
 124  0
             if (iterations > 50) {
 125  0
                 return 0;
 126  
             }
 127  0
         }
 128  
 
 129  
         // now we have an interval to search in
 130  0
         double lowBoost = Math.min(lastBoost, currentBoost);
 131  0
         double highBoost = Math.max(lastBoost, currentBoost);
 132  
 
 133  0
         while (highBoost - lowBoost > 0.00005) {
 134  0
             double middleBoost = (highBoost + lowBoost) / 2;
 135  0
             currentPvalue = test(testName, middleBoost);
 136  
 
 137  0
             if (currentPvalue > pvalue) {
 138  0
                 highBoost = middleBoost;
 139  
             } else {
 140  0
                 lowBoost = middleBoost;
 141  
             }
 142  
 
 143  0
             iterations++;
 144  
 
 145  0
             if (iterations > 100) {
 146  0
                 return 0;
 147  
             }
 148  0
         }
 149  
 
 150  0
         return lowBoost;
 151  
     }
 152  
 
 153  
     public double test(String testName, double boost) {
 154  0
         if (testName.compareToIgnoreCase("ttest") == 0 || testName.compareToIgnoreCase("pairedTTest") == 0) {
 155  0
             return pairedTTest(boost);
 156  0
         } else if (testName.compareToIgnoreCase("sign") == 0) {
 157  0
             return signTest(boost);
 158  0
         } else if (testName.compareToIgnoreCase("randomized") == 0) {
 159  0
             return randomizedTest(boost);
 160  
         } else {
 161  0
             throw new RuntimeException("'" + testName + "' is not a recognized test.");
 162  
         }
 163  
     }
 164  
 
 165  
     public double pairedTTest() {
 166  0
         return pairedTTest(1.0);
 167  
     }
 168  
 
 169  
     public double pairedTTest(double boost) {
 170  0
         double[] boostedBaseline = multiply(baseline, boost);
 171  0
         double sampleSum = 0;
 172  0
         double sampleSumSquares = 0;
 173  0
         int n = boostedBaseline.length;
 174  
 
 175  0
         for (int i = 0; i < baseline.length; i++) {
 176  0
             double delta = treatment[i] - boostedBaseline[i];
 177  0
             sampleSum += delta;
 178  0
             sampleSumSquares += delta * delta;
 179  
         }
 180  
 
 181  0
         double sampleVariance = sampleSumSquares / (n - 1);
 182  0
         double sampleMean = sampleSum / baseline.length;
 183  
 
 184  0
         double sampleDeviation = Math.sqrt(sampleVariance);
 185  0
         double meanDeviation = sampleDeviation / Math.sqrt(n);
 186  0
         double t = sampleMean / meanDeviation;
 187  
 
 188  0
         return 1.0 - Stat.studentTProb(t, n - 1);
 189  
     }
 190  
 
 191  
     public double signTest() {
 192  0
         return signTest(1.0);
 193  
     }
 194  
 
 195  
     public double signTest(double boost) {
 196  0
         int treatmentIsBetter = 0;
 197  0
         int different = 0;
 198  
 
 199  0
         for (int i = 0; i < treatment.length; i++) {
 200  0
             double boostedBaseline = baseline[i] * boost;
 201  0
             if (treatment[i] > boostedBaseline) {
 202  0
                 treatmentIsBetter++;
 203  
             }
 204  0
             if (treatment[i] != boostedBaseline) {
 205  0
                 different++;
 206  
             }
 207  
         }
 208  
 
 209  0
         double pvalue = Stat.binomialProb(0.5, different, treatmentIsBetter);
 210  0
         return pvalue;
 211  
     }
 212  
 
 213  
     public double randomizedTest() {
 214  0
         return randomizedTest(1.0);
 215  
     }
 216  
 
 217  
     public double randomizedTest(double boost) {
 218  0
         double[] boostedBaseline = multiply(baseline, boost);
 219  0
         double baseMean = mean(boostedBaseline);
 220  0
         double treatmentMean = mean(treatment);
 221  0
         double difference = treatmentMean - baseMean;
 222  0
         int batch = 10000;
 223  
 
 224  0
         final int maxIterationsWithoutMatch = 1000000;
 225  0
         long iterations = 0;
 226  0
         long matches = 0;
 227  
 
 228  0
         double[] leftSample = new double[boostedBaseline.length];
 229  0
         double[] rightSample = new double[boostedBaseline.length];
 230  0
         Random random = new Random();
 231  0
         double pValue = 0.0;
 232  
 
 233  
         while (true) {
 234  0
             for (int i = 0; i < batch; i++) {
 235  
                 // create a sample from both distributions
 236  0
                 for (int j = 0; j < boostedBaseline.length; j++) {
 237  0
                     if (random.nextBoolean()) {
 238  0
                         leftSample[j] = boostedBaseline[j];
 239  0
                         rightSample[j] = treatment[j];
 240  
                     } else {
 241  0
                         leftSample[j] = treatment[j];
 242  0
                         rightSample[j] = boostedBaseline[j];
 243  
                     }
 244  
                 }
 245  
 
 246  0
                 double sampleDifference = mean(leftSample) - mean(rightSample);
 247  
 
 248  0
                 if (difference <= sampleDifference) {
 249  0
                     matches++;
 250  
                 }
 251  
             }
 252  
 
 253  0
             iterations += batch;
 254  
 
 255  
             // this is the current p-value estimate
 256  0
             pValue = (double) matches / (double) iterations;
 257  
 
 258  
             // if we still haven't found a match, keep looking
 259  0
             if (matches == 0) {
 260  0
                 if (iterations < maxIterationsWithoutMatch) {
 261  0
                     continue;
 262  
                 } else {
 263  
                     break;
 264  
                 }
 265  
             }
 266  
 
 267  
             // this is our accepted level of deviation in the p-value; we require:
 268  
             //      - accuracy at the fourth decimal place, and
 269  
             //      - less than 5% error in the p-value, or
 270  
             //      - accuracy at the sixth decimal place.
 271  
 
 272  0
             double maxDeviation = Math.max(0.0000005 / pValue, Math.min(0.00005 / pValue, 0.05));
 273  
 
 274  
             // this estimate is derived in Efron and Tibshirani, p.209.
 275  
             // this is the estimated number of iterations necessary for convergence, given
 276  
             // our current p-value estimate.
 277  0
             double estimatedIterations = Math.sqrt(pValue * (1.0 - pValue)) / maxDeviation;
 278  
 
 279  0
             if (estimatedIterations > iterations) {
 280  0
                 break;
 281  
             }
 282  0
         }
 283  
 
 284  0
         return pValue;
 285  
     }
 286  
 }