Coverage Report - org.galagosearch.core.eval.RetrievalEvaluator
 
Classes in this File Line Coverage Branch Coverage Complexity
RetrievalEvaluator
0%
0/140
0%
0/78
0
RetrievalEvaluator$Document
0%
0/10
N/A
0
RetrievalEvaluator$Judgment
0%
0/4
N/A
0
 
 1  
 // BSD License (http://www.galagosearch.org/license)
 2  
 
 3  
 package org.galagosearch.core.eval;
 4  
 
 5  
 import java.util.ArrayList;
 6  
 import java.util.Collection;
 7  
 import java.util.HashMap;
 8  
 import java.util.List;
 9  
 import java.util.TreeMap;
 10  
 
 11  
 /**
 12  
  * <p>A retrieval evaluator object computes a variety of standard
 13  
  * information retrieval metrics commonly used in TREC, including
 14  
  * binary preference (BPREF), geometric mean average precision (GMAP),
 15  
  * mean average precision (MAP), and standard precision and recall.
 16  
  * In addition, the object gives access to the relevant documents that
 17  
  * were found, and the relevant documents that were missed.</p>
 18  
  *
 19  
  * <p>BPREF is defined in Buckley and Voorhees, "Retrieval Evaluation
 20  
  * with Incomplete Information", SIGIR 2004.</p>
 21  
  *
 22  
  * @author Trevor Strohman
 23  
  */
 24  0
 public class RetrievalEvaluator {
 25  
     /**
 26  
      * This class represents a document returned by a retrieval
 27  
      * system.  It can be subclassed if you want to add system-dependent
 28  
      * information to the document representation.
 29  
      */
 30  
     public static class Document {
 31  
         /**
 32  
          * Constructs a new Document object.
 33  
          *
 34  
          * @param documentNumber The document identifier.
 35  
          * @param rank The rank of the document in a retrieved ranked list.
 36  
          * @param score The score given to this document by the retrieval system.
 37  
          */
 38  0
         public Document(String documentNumber, int rank, double score) {
 39  0
             this.documentNumber = documentNumber;
 40  0
             this.rank = rank;
 41  0
             this.score = score;
 42  0
         }
 43  
 
 44  
         /**
 45  
          * Constructs a new Document object.
 46  
          *
 47  
          * @param documentNumber The document identifier.
 48  
          */
 49  0
         public Document(String documentNumber) {
 50  0
             this.documentNumber = documentNumber;
 51  0
             this.rank = Integer.MAX_VALUE;
 52  0
             this.score = Double.NEGATIVE_INFINITY;
 53  0
         }
 54  
         /** The rank of the document in a retrieved ranked list. */
 55  
         public int rank;
 56  
         /** The document identifier. */
 57  
         public String documentNumber;
 58  
         /** The score given to this document by the retrieval system. */
 59  
         public double score;
 60  
     }
 61  
 
 62  
     /**
 63  
      * This class represents a relevance judgment of a particular document
 64  
      * for a specific query.
 65  
      */
 66  
     public static class Judgment {
 67  
         /**
 68  
          * Constructs a new Judgment instance.
 69  
          *
 70  
          * @param documentNumber The document identifier.
 71  
          * @param judgment The relevance judgment for this document, where positive values mean relevant, and zero means not relevant.
 72  
          */
 73  0
         public Judgment(String documentNumber, int judgment) {
 74  0
             this.documentNumber = documentNumber;
 75  0
             this.judgment = judgment;
 76  0
         }
 77  
         /** The document identifier. */
 78  
         public String documentNumber;
 79  
         /** The relevance judgment for this document, where positive values mean relevant, and zero means not relevant. */
 80  
         public int judgment;
 81  
     }
 82  
     private String _queryName;
 83  
     private ArrayList<Document> _retrieved;
 84  
     private ArrayList<Document> _judgedMissed;
 85  
     private ArrayList<Document> _relevant;
 86  
     private ArrayList<Document> _relevantRetrieved;
 87  
     private ArrayList<Document> _judgedIrrelevantRetrieved;
 88  
     private ArrayList<Document> _irrelevantRetrieved;
 89  
     private ArrayList<Document> _relevantMissed;
 90  
     private HashMap<String, Judgment> _judgments;
 91  
 
 92  
     /**
 93  
      * Creates a new instance of RetrievalEvaluator
 94  
      *
 95  
      * @param retrieved A ranked list of retrieved documents.
 96  
      * @param judgments A collection of relevance judgments.
 97  
      */
 98  0
     public RetrievalEvaluator(String queryName, List<Document> retrieved, Collection<Judgment> judgments) {
 99  0
         _queryName = queryName;
 100  0
         _retrieved = new ArrayList<Document>(retrieved);
 101  
 
 102  0
         _buildJudgments(judgments);
 103  0
         _judgeRetrievedDocuments();
 104  0
         _findMissedDocuments();
 105  0
         _findRelevantDocuments();
 106  0
     }
 107  
 
 108  
     private void _buildJudgments(Collection<Judgment> judgments) {
 109  0
         _judgments = new HashMap<String, Judgment>();
 110  
 
 111  0
         for (Judgment judgment : judgments) {
 112  0
             _judgments.put(judgment.documentNumber, judgment);
 113  
         }
 114  0
     }
 115  
 
 116  
     private void _judgeRetrievedDocuments() {
 117  0
         _irrelevantRetrieved = new ArrayList<Document>();
 118  0
         _relevantRetrieved = new ArrayList<Document>();
 119  0
         _judgedIrrelevantRetrieved = new ArrayList<Document>();
 120  
 
 121  0
         for (Document document : _retrieved) {
 122  0
             boolean relevant = false;
 123  0
             boolean judgedIrrelevant = false;
 124  0
             Judgment judgment = _judgments.get(document.documentNumber);
 125  
 
 126  0
             relevant = (judgment != null) && (judgment.judgment > 0);
 127  0
             judgedIrrelevant = (judgment != null) && (judgment.judgment <= 0);
 128  
 
 129  0
             if (relevant) {
 130  0
                 _relevantRetrieved.add(document);
 131  
             } else {
 132  0
                 _irrelevantRetrieved.add(document);
 133  
 
 134  0
                 if (judgedIrrelevant) {
 135  0
                     _judgedIrrelevantRetrieved.add(document);
 136  
                 }
 137  
             }
 138  0
         }
 139  0
     }
 140  
 
 141  
     private void _findMissedDocuments() {
 142  0
         HashMap<String, Judgment> missedDocuments = new HashMap<String, Judgment>(_judgments);
 143  
 
 144  0
         for (Document document : _relevantRetrieved) {
 145  0
             missedDocuments.remove(document.documentNumber);
 146  
         }
 147  
 
 148  0
         for (Document document : _judgedIrrelevantRetrieved) {
 149  0
             missedDocuments.remove(document.documentNumber);
 150  
         }
 151  
 
 152  0
         _judgedMissed = new ArrayList<Document>();
 153  0
         _relevantMissed = new ArrayList<Document>();
 154  
 
 155  0
         for (Judgment judgment : missedDocuments.values()) {
 156  0
             Document document = new Document(judgment.documentNumber);
 157  0
             _judgedMissed.add(document);
 158  
 
 159  0
             if (judgment.judgment > 0) {
 160  0
                 _relevantMissed.add(document);
 161  
             }
 162  0
         }
 163  0
     }
 164  
 
 165  
     private void _findRelevantDocuments() {
 166  0
         _relevant = new ArrayList<Document>();
 167  0
         _relevant.addAll(_relevantRetrieved);
 168  0
         _relevant.addAll(_relevantMissed);
 169  0
     }
 170  
 
 171  
     /**
 172  
      * Returns the name of the query represented by this evaluator.
 173  
      */
 174  
     public String queryName() {
 175  0
         return _queryName;
 176  
     }
 177  
 
 178  
     /**
 179  
      * Returns the precision of the retrieval at a given number of documents retrieved.
 180  
      * The precision is the number of relevant documents retrieved
 181  
      * divided by the total number of documents retrieved.
 182  
      *
 183  
      * @param documentsRetrieved The evaluation rank.
 184  
      */
 185  
     public double precision(int documentsRetrieved) {
 186  0
         return (double) relevantRetrieved(documentsRetrieved) / (double) documentsRetrieved;
 187  
     }
 188  
 
 189  
     /**
 190  
      * Returns the recall of the retrieval at a given number of documents retrieved.
 191  
      * The recall is the number of relevant documents retrieved
 192  
      * divided by the total number of relevant documents for the query.
 193  
      *
 194  
      * @param documentsRetrieved The evaluation rank.
 195  
      */
 196  
     public double recall(int documentsRetrieved) {
 197  0
         return (double) relevantRetrieved(documentsRetrieved) / (double) _relevant.size();
 198  
     }
 199  
 
 200  
     /**
 201  
      * Returns the precision at the rank equal to the total number of
 202  
      * relevant documents retrieved.  This method is equivalent to
 203  
      * precision(relevantDocuments().size()).
 204  
      */
 205  
     public double rPrecision() {
 206  0
         int relevantCount = _relevant.size();
 207  0
         int retrievedCount = _retrieved.size();
 208  
 
 209  0
         if (relevantCount > retrievedCount) {
 210  0
             return 0;
 211  
         }
 212  
 
 213  0
         return precision(relevantCount);
 214  
     }
 215  
 
 216  
     /**
 217  
      * Returns the reciprocal of the rank of the first relevant document
 218  
      * retrieved, or zero if no relevant documents were retrieved.
 219  
      */
 220  
     public double reciprocalRank() {
 221  0
         if (_relevantRetrieved.size() == 0) {
 222  0
             return 0;
 223  
         }
 224  0
         return 1.0 / (double) _relevantRetrieved.get(0).rank;
 225  
     }
 226  
 
 227  
     /**
 228  
      * Returns the average precision of the query.<p>
 229  
      *
 230  
      * Suppose the precision is evaluated once at the rank of
 231  
      * each relevant document in the retrieval.  If a document is
 232  
      * not retrieved, we assume that it was retrieved at rank infinity.
 233  
      * The mean of all these precision values is the average precision.
 234  
      */
 235  
     public double averagePrecision() {
 236  0
         double sumPrecision = 0;
 237  0
         int relevantCount = 0;
 238  
 
 239  0
         for (Document document : _relevantRetrieved) {
 240  0
             relevantCount++;
 241  0
             sumPrecision += relevantCount / (double) document.rank;
 242  
         }
 243  
 
 244  0
         return (double) sumPrecision / _relevant.size();
 245  
     }
 246  
 
 247  
     /**
 248  
      * <p>The binary preference measure, as presented in Buckley, Voorhees
 249  
      * "Retrieval Evaluation with Incomplete Information", SIGIR 2004.
 250  
      * This implemenation is the 'pure' version, which is the one
 251  
      * used in Buckley's trec_eval.</p>
 252  
      *
 253  
      * <p>The formula is:
 254  
      * <tt>1/R \sum_{r} 1 - |n ranked greater than r| / R</tt>
 255  
      * where R is the number of relevant documents for this topic, and
 256  
      * n is a member of the set of first R judged irrelevant documents
 257  
      * retrieved.</p>
 258  
      */
 259  
     public double binaryPreference() {
 260  
         // Edge case: no relevant documents retrieved
 261  
         // Edge case: more relevant documents than count retrieved
 262  0
         int totalRelevant = _relevant.size();
 263  0
         int i = 0;
 264  0
         int j = 0;
 265  0
         int irrelevantCount = Math.min(totalRelevant, _judgedIrrelevantRetrieved.size());
 266  0
         List<Document> irrelevant = _judgedIrrelevantRetrieved.subList(0, irrelevantCount);
 267  0
         double sum = 0;
 268  
 
 269  0
         while (i < _relevantRetrieved.size() && j < irrelevant.size()) {
 270  0
             Document rel = _relevantRetrieved.get(i);
 271  0
             Document irr = irrelevant.get(j);
 272  
 
 273  0
             if (rel.rank < irr.rank) {
 274  
                 // we've just seen a relevant document;
 275  
                 // how many of the irrelevant set are ahead of us?
 276  0
                 assert j <= totalRelevant;
 277  0
                 sum += 1.0 - ((double) j / (double) irrelevantCount);
 278  0
                 i++;
 279  
             } else {
 280  0
                 j++;
 281  
             }
 282  0
         }
 283  
 
 284  0
         return sum / (double) totalRelevant;
 285  
     }
 286  
 
 287  
     /** 
 288  
      * <p>Normalized Discounted Cumulative Gain </p>
 289  
      *
 290  
      * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
 291  
      * for Retrieving Highly Relevant Documents" SIGIR 2001.  I copied the formula
 292  
      * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
 293  
      * Search", SIGIR 2006.
 294  
      *
 295  
      * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
 296  
      *
 297  
      * Where N is such that the score cannot be greater than 1.  We compute this
 298  
      * by computing the DCG (unnormalized) of a perfect ranking.
 299  
      */
 300  
     public double normalizedDiscountedCumulativeGain() {
 301  0
         return normalizedDiscountedCumulativeGain(Math.max(_retrieved.size(), _judgments.size()));
 302  
     }
 303  
 
 304  
     /** 
 305  
      * <p>Normalized Discounted Cumulative Gain </p>
 306  
      *
 307  
      * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
 308  
      * for Retrieving Highly Relevant Documents" SIGIR 2001.  I copied the formula
 309  
      * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
 310  
      * Search", SIGIR 2006.
 311  
      *
 312  
      * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
 313  
      *
 314  
      * Where N is such that the score cannot be greater than 1.  We compute this
 315  
      * by computing the DCG (unnormalized) of a perfect ranking.
 316  
      */
 317  
     public double normalizedDiscountedCumulativeGain(int documentsRetrieved) {
 318  
         // first, compute the gain from an optimal ranking  
 319  0
         double normalizer = normalizationTermNDCG(documentsRetrieved);
 320  
 
 321  
         // now, compute the NDCG of the ranking and return that
 322  0
         double dcg = 0;
 323  0
         List<Document> truncated = _retrieved;
 324  
 
 325  0
         if (_retrieved.size() > documentsRetrieved) {
 326  0
             truncated = _retrieved.subList(0,
 327  
                                                                                    documentsRetrieved);
 328  
         }
 329  0
         for (Document document : truncated) {
 330  0
             Judgment judgment = _judgments.get(document.documentNumber);
 331  
 
 332  0
             if (judgment != null && judgment.judgment > 0) {
 333  0
                 dcg += (Math.pow(2, judgment.judgment) - 1.0) / Math.log(1 + document.rank);
 334  
             }
 335  0
         }
 336  
 
 337  0
         return dcg / normalizer;
 338  
     }
 339  
 
 340  
     protected double normalizationTermNDCG(int documentsRetrieved) {
 341  0
         TreeMap<Integer, Integer> relevanceCounts = new TreeMap();
 342  
 
 343  
         // the normalization term represents the highest possible DCG score
 344  
         // that could possibly be attained.  we compute this by taking the relevance
 345  
         // judgments, ordering them by relevance value (highly relevant documents first)
 346  
         // then calling that the ranked list, and computing its DCG value.
 347  
 
 348  
         // we use negative judgment values so they come out of the map
 349  
         // in order from highest to lowest relevance
 350  0
         for (Judgment judgment : _judgments.values()) {
 351  0
             if (judgment.judgment == 0) {
 352  0
                 continue;
 353  
             }
 354  0
             if (!relevanceCounts.containsKey(-judgment.judgment)) {
 355  0
                 relevanceCounts.put(-judgment.judgment, 0);
 356  
             }
 357  
 
 358  0
             relevanceCounts.put(-judgment.judgment, relevanceCounts.get(-judgment.judgment) + 1);
 359  
         }
 360  
 
 361  0
         double normalizer = 0;
 362  0
         int countsSoFar = 0;
 363  0
         int documentsProcessed = 0;
 364  
 
 365  0
         for (Integer negativeRelevanceValue : relevanceCounts.keySet()) {
 366  0
             int relevanceCount = (int) relevanceCounts.get(negativeRelevanceValue);
 367  0
             int relevanceValue = -negativeRelevanceValue;
 368  0
             relevanceCount = Math.min(relevanceCount, documentsRetrieved - documentsProcessed);
 369  
 
 370  0
             for (int i = 1; i <= relevanceCount; i++) {
 371  0
                 normalizer += (Math.pow(2, relevanceValue) - 1.0) / Math.log(1 + i + countsSoFar);
 372  
             }
 373  
 
 374  0
             documentsProcessed += relevanceCount;
 375  0
             if (documentsProcessed >= documentsRetrieved) {
 376  0
                 break;
 377  
             }
 378  0
         }
 379  
 
 380  0
         return normalizer;
 381  
     }
 382  
 
 383  
     /**
 384  
      * The number of relevant documents retrieved at a particular
 385  
      * rank.  This is equivalent to <tt>n * precision(n)</tt>.
 386  
      */
 387  
     public int relevantRetrieved(int documentsRetrieved) {
 388  0
         int low = 0;
 389  0
         int high = _relevantRetrieved.size() - 1;
 390  
 
 391  
         // if we didn't retrieve anything relevant, we know the answer
 392  0
         if (_relevantRetrieved.size() == 0) {
 393  0
             return 0;        // is this after the last relevant document?
 394  
         }
 395  0
         Document lastRelevant = _relevantRetrieved.get(high);
 396  0
         if (lastRelevant.rank <= documentsRetrieved) {
 397  0
             return _relevantRetrieved.size();        // is this before the first relevant document?
 398  
         }
 399  0
         Document firstRelevant = _relevantRetrieved.get(low);
 400  0
         if (firstRelevant.rank > documentsRetrieved) {
 401  0
             return 0;
 402  
         }
 403  0
         while ((high - low) >= 2) {
 404  0
             int middle = low + (high - low) / 2;
 405  0
             Document middleDocument = _relevantRetrieved.get(middle);
 406  
 
 407  0
             if (middleDocument.rank == documentsRetrieved) {
 408  0
                 return middle + 1;
 409  0
             } else if (middleDocument.rank > documentsRetrieved) {
 410  0
                 high = middle;
 411  
             } else {
 412  0
                 low = middle;
 413  
             }
 414  0
         }
 415  
 
 416  
         // if the high document had rank == documentsRetrieved, we would
 417  
         // have already returned, either at the top, or at the return middle statement
 418  0
         assert _relevantRetrieved.get(low).rank <= documentsRetrieved &&
 419  
                 _relevantRetrieved.get(high).rank > documentsRetrieved;
 420  
 
 421  0
         return low + 1;
 422  
     }
 423  
 
 424  
     /**
 425  
      * @return The list of retrieved documents.
 426  
      */
 427  
     public ArrayList<Document> retrievedDocuments() {
 428  0
         return _retrieved;
 429  
     }
 430  
 
 431  
     /**
 432  
      * @return The list of all documents retrieved that were explicitly judged irrelevant.
 433  
      */
 434  
     public ArrayList<Document> judgedIrrelevantRetrievedDocuments() {
 435  0
         return _judgedIrrelevantRetrieved;
 436  
     }
 437  
 
 438  
     /**
 439  
      * This method returns a list of all documents that were retrieved
 440  
      * but assumed to be irrelevant.  This includes both documents that were
 441  
      * judged to be irrelevant and those that were not judged at all.
 442  
      * The list is returned in retrieval order.
 443  
      */
 444  
     public ArrayList<Document> irrelevantRetrievedDocuments() {
 445  0
         return _irrelevantRetrieved;
 446  
     }
 447  
 
 448  
     /**
 449  
      * Returns a list of retrieved documents that were judged relevant,
 450  
      * in the order that they were retrieved.
 451  
      */
 452  
     public ArrayList<Document> relevantRetrievedDocuments() {
 453  0
         return _relevantRetrieved;
 454  
     }
 455  
 
 456  
     /**
 457  
      * Returns a list of all documents judged relevant, whether they were
 458  
      * retrieved or not.  Documents are listed in the order they were retrieved,
 459  
      * with those not retrieved coming last.
 460  
      */
 461  
     public ArrayList<Document> relevantDocuments() {
 462  0
         return _relevant;
 463  
     }
 464  
 
 465  
     /**
 466  
      * Returns a list of documents that were judged relevant that
 467  
      * were not retrieved.
 468  
      */
 469  
     public ArrayList<Document> relevantMissedDocuments() {
 470  0
         return _relevantMissed;
 471  
     }
 472  
 }