View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.eval;
4   
5   import java.util.ArrayList;
6   import java.util.Collection;
7   import java.util.HashMap;
8   import java.util.List;
9   import java.util.TreeMap;
10  
11  /***
12   * <p>A retrieval evaluator object computes a variety of standard
13   * information retrieval metrics commonly used in TREC, including
14   * binary preference (BPREF), geometric mean average precision (GMAP),
15   * mean average precision (MAP), and standard precision and recall.
16   * In addition, the object gives access to the relevant documents that
17   * were found, and the relevant documents that were missed.</p>
18   *
19   * <p>BPREF is defined in Buckley and Voorhees, "Retrieval Evaluation
20   * with Incomplete Information", SIGIR 2004.</p>
21   *
22   * @author Trevor Strohman
23   */
24  public class RetrievalEvaluator {
25      /***
26       * This class represents a document returned by a retrieval
27       * system.  It can be subclassed if you want to add system-dependent
28       * information to the document representation.
29       */
30      public static class Document {
31          /***
32           * Constructs a new Document object.
33           *
34           * @param documentNumber The document identifier.
35           * @param rank The rank of the document in a retrieved ranked list.
36           * @param score The score given to this document by the retrieval system.
37           */
38          public Document(String documentNumber, int rank, double score) {
39              this.documentNumber = documentNumber;
40              this.rank = rank;
41              this.score = score;
42          }
43  
44          /***
45           * Constructs a new Document object.
46           *
47           * @param documentNumber The document identifier.
48           */
49          public Document(String documentNumber) {
50              this.documentNumber = documentNumber;
51              this.rank = Integer.MAX_VALUE;
52              this.score = Double.NEGATIVE_INFINITY;
53          }
54          /*** The rank of the document in a retrieved ranked list. */
55          public int rank;
56          /*** The document identifier. */
57          public String documentNumber;
58          /*** The score given to this document by the retrieval system. */
59          public double score;
60      }
61  
62      /***
63       * This class represents a relevance judgment of a particular document
64       * for a specific query.
65       */
66      public static class Judgment {
67          /***
68           * Constructs a new Judgment instance.
69           *
70           * @param documentNumber The document identifier.
71           * @param judgment The relevance judgment for this document, where positive values mean relevant, and zero means not relevant.
72           */
73          public Judgment(String documentNumber, int judgment) {
74              this.documentNumber = documentNumber;
75              this.judgment = judgment;
76          }
77          /*** The document identifier. */
78          public String documentNumber;
79          /*** The relevance judgment for this document, where positive values mean relevant, and zero means not relevant. */
80          public int judgment;
81      }
82      private String _queryName;
83      private ArrayList<Document> _retrieved;
84      private ArrayList<Document> _judgedMissed;
85      private ArrayList<Document> _relevant;
86      private ArrayList<Document> _relevantRetrieved;
87      private ArrayList<Document> _judgedIrrelevantRetrieved;
88      private ArrayList<Document> _irrelevantRetrieved;
89      private ArrayList<Document> _relevantMissed;
90      private HashMap<String, Judgment> _judgments;
91  
92      /***
93       * Creates a new instance of RetrievalEvaluator
94       *
95       * @param retrieved A ranked list of retrieved documents.
96       * @param judgments A collection of relevance judgments.
97       */
98      public RetrievalEvaluator(String queryName, List<Document> retrieved, Collection<Judgment> judgments) {
99          _queryName = queryName;
100         _retrieved = new ArrayList<Document>(retrieved);
101 
102         _buildJudgments(judgments);
103         _judgeRetrievedDocuments();
104         _findMissedDocuments();
105         _findRelevantDocuments();
106     }
107 
108     private void _buildJudgments(Collection<Judgment> judgments) {
109         _judgments = new HashMap<String, Judgment>();
110 
111         for (Judgment judgment : judgments) {
112             _judgments.put(judgment.documentNumber, judgment);
113         }
114     }
115 
116     private void _judgeRetrievedDocuments() {
117         _irrelevantRetrieved = new ArrayList<Document>();
118         _relevantRetrieved = new ArrayList<Document>();
119         _judgedIrrelevantRetrieved = new ArrayList<Document>();
120 
121         for (Document document : _retrieved) {
122             boolean relevant = false;
123             boolean judgedIrrelevant = false;
124             Judgment judgment = _judgments.get(document.documentNumber);
125 
126             relevant = (judgment != null) && (judgment.judgment > 0);
127             judgedIrrelevant = (judgment != null) && (judgment.judgment <= 0);
128 
129             if (relevant) {
130                 _relevantRetrieved.add(document);
131             } else {
132                 _irrelevantRetrieved.add(document);
133 
134                 if (judgedIrrelevant) {
135                     _judgedIrrelevantRetrieved.add(document);
136                 }
137             }
138         }
139     }
140 
141     private void _findMissedDocuments() {
142         HashMap<String, Judgment> missedDocuments = new HashMap<String, Judgment>(_judgments);
143 
144         for (Document document : _relevantRetrieved) {
145             missedDocuments.remove(document.documentNumber);
146         }
147 
148         for (Document document : _judgedIrrelevantRetrieved) {
149             missedDocuments.remove(document.documentNumber);
150         }
151 
152         _judgedMissed = new ArrayList<Document>();
153         _relevantMissed = new ArrayList<Document>();
154 
155         for (Judgment judgment : missedDocuments.values()) {
156             Document document = new Document(judgment.documentNumber);
157             _judgedMissed.add(document);
158 
159             if (judgment.judgment > 0) {
160                 _relevantMissed.add(document);
161             }
162         }
163     }
164 
165     private void _findRelevantDocuments() {
166         _relevant = new ArrayList<Document>();
167         _relevant.addAll(_relevantRetrieved);
168         _relevant.addAll(_relevantMissed);
169     }
170 
171     /***
172      * Returns the name of the query represented by this evaluator.
173      */
174     public String queryName() {
175         return _queryName;
176     }
177 
178     /***
179      * Returns the precision of the retrieval at a given number of documents retrieved.
180      * The precision is the number of relevant documents retrieved
181      * divided by the total number of documents retrieved.
182      *
183      * @param documentsRetrieved The evaluation rank.
184      */
185     public double precision(int documentsRetrieved) {
186         return (double) relevantRetrieved(documentsRetrieved) / (double) documentsRetrieved;
187     }
188 
189     /***
190      * Returns the recall of the retrieval at a given number of documents retrieved.
191      * The recall is the number of relevant documents retrieved
192      * divided by the total number of relevant documents for the query.
193      *
194      * @param documentsRetrieved The evaluation rank.
195      */
196     public double recall(int documentsRetrieved) {
197         return (double) relevantRetrieved(documentsRetrieved) / (double) _relevant.size();
198     }
199 
200     /***
201      * Returns the precision at the rank equal to the total number of
202      * relevant documents retrieved.  This method is equivalent to
203      * precision(relevantDocuments().size()).
204      */
205     public double rPrecision() {
206         int relevantCount = _relevant.size();
207         int retrievedCount = _retrieved.size();
208 
209         if (relevantCount > retrievedCount) {
210             return 0;
211         }
212 
213         return precision(relevantCount);
214     }
215 
216     /***
217      * Returns the reciprocal of the rank of the first relevant document
218      * retrieved, or zero if no relevant documents were retrieved.
219      */
220     public double reciprocalRank() {
221         if (_relevantRetrieved.size() == 0) {
222             return 0;
223         }
224         return 1.0 / (double) _relevantRetrieved.get(0).rank;
225     }
226 
227     /***
228      * Returns the average precision of the query.<p>
229      *
230      * Suppose the precision is evaluated once at the rank of
231      * each relevant document in the retrieval.  If a document is
232      * not retrieved, we assume that it was retrieved at rank infinity.
233      * The mean of all these precision values is the average precision.
234      */
235     public double averagePrecision() {
236         double sumPrecision = 0;
237         int relevantCount = 0;
238 
239         for (Document document : _relevantRetrieved) {
240             relevantCount++;
241             sumPrecision += relevantCount / (double) document.rank;
242         }
243 
244         return (double) sumPrecision / _relevant.size();
245     }
246 
247     /***
248      * <p>The binary preference measure, as presented in Buckley, Voorhees
249      * "Retrieval Evaluation with Incomplete Information", SIGIR 2004.
250      * This implemenation is the 'pure' version, which is the one
251      * used in Buckley's trec_eval.</p>
252      *
253      * <p>The formula is:
254      * <tt>1/R \sum_{r} 1 - |n ranked greater than r| / R</tt>
255      * where R is the number of relevant documents for this topic, and
256      * n is a member of the set of first R judged irrelevant documents
257      * retrieved.</p>
258      */
259     public double binaryPreference() {
260         // Edge case: no relevant documents retrieved
261         // Edge case: more relevant documents than count retrieved
262         int totalRelevant = _relevant.size();
263         int i = 0;
264         int j = 0;
265         int irrelevantCount = Math.min(totalRelevant, _judgedIrrelevantRetrieved.size());
266         List<Document> irrelevant = _judgedIrrelevantRetrieved.subList(0, irrelevantCount);
267         double sum = 0;
268 
269         while (i < _relevantRetrieved.size() && j < irrelevant.size()) {
270             Document rel = _relevantRetrieved.get(i);
271             Document irr = irrelevant.get(j);
272 
273             if (rel.rank < irr.rank) {
274                 // we've just seen a relevant document;
275                 // how many of the irrelevant set are ahead of us?
276                 assert j <= totalRelevant;
277                 sum += 1.0 - ((double) j / (double) irrelevantCount);
278                 i++;
279             } else {
280                 j++;
281             }
282         }
283 
284         return sum / (double) totalRelevant;
285     }
286 
287     /*** 
288      * <p>Normalized Discounted Cumulative Gain </p>
289      *
290      * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
291      * for Retrieving Highly Relevant Documents" SIGIR 2001.  I copied the formula
292      * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
293      * Search", SIGIR 2006.
294      *
295      * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
296      *
297      * Where N is such that the score cannot be greater than 1.  We compute this
298      * by computing the DCG (unnormalized) of a perfect ranking.
299      */
300     public double normalizedDiscountedCumulativeGain() {
301         return normalizedDiscountedCumulativeGain(Math.max(_retrieved.size(), _judgments.size()));
302     }
303 
304     /*** 
305      * <p>Normalized Discounted Cumulative Gain </p>
306      *
307      * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
308      * for Retrieving Highly Relevant Documents" SIGIR 2001.  I copied the formula
309      * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
310      * Search", SIGIR 2006.
311      *
312      * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
313      *
314      * Where N is such that the score cannot be greater than 1.  We compute this
315      * by computing the DCG (unnormalized) of a perfect ranking.
316      */
317     public double normalizedDiscountedCumulativeGain(int documentsRetrieved) {
318         // first, compute the gain from an optimal ranking  
319         double normalizer = normalizationTermNDCG(documentsRetrieved);
320 
321         // now, compute the NDCG of the ranking and return that
322         double dcg = 0;
323         List<Document> truncated = _retrieved;
324 
325         if (_retrieved.size() > documentsRetrieved) {
326             truncated = _retrieved.subList(0,
327                                                                                    documentsRetrieved);
328         }
329         for (Document document : truncated) {
330             Judgment judgment = _judgments.get(document.documentNumber);
331 
332             if (judgment != null && judgment.judgment > 0) {
333                 dcg += (Math.pow(2, judgment.judgment) - 1.0) / Math.log(1 + document.rank);
334             }
335         }
336 
337         return dcg / normalizer;
338     }
339 
340     protected double normalizationTermNDCG(int documentsRetrieved) {
341         TreeMap<Integer, Integer> relevanceCounts = new TreeMap();
342 
343         // the normalization term represents the highest possible DCG score
344         // that could possibly be attained.  we compute this by taking the relevance
345         // judgments, ordering them by relevance value (highly relevant documents first)
346         // then calling that the ranked list, and computing its DCG value.
347 
348         // we use negative judgment values so they come out of the map
349         // in order from highest to lowest relevance
350         for (Judgment judgment : _judgments.values()) {
351             if (judgment.judgment == 0) {
352                 continue;
353             }
354             if (!relevanceCounts.containsKey(-judgment.judgment)) {
355                 relevanceCounts.put(-judgment.judgment, 0);
356             }
357 
358             relevanceCounts.put(-judgment.judgment, relevanceCounts.get(-judgment.judgment) + 1);
359         }
360 
361         double normalizer = 0;
362         int countsSoFar = 0;
363         int documentsProcessed = 0;
364 
365         for (Integer negativeRelevanceValue : relevanceCounts.keySet()) {
366             int relevanceCount = (int) relevanceCounts.get(negativeRelevanceValue);
367             int relevanceValue = -negativeRelevanceValue;
368             relevanceCount = Math.min(relevanceCount, documentsRetrieved - documentsProcessed);
369 
370             for (int i = 1; i <= relevanceCount; i++) {
371                 normalizer += (Math.pow(2, relevanceValue) - 1.0) / Math.log(1 + i + countsSoFar);
372             }
373 
374             documentsProcessed += relevanceCount;
375             if (documentsProcessed >= documentsRetrieved) {
376                 break;
377             }
378         }
379 
380         return normalizer;
381     }
382 
383     /***
384      * The number of relevant documents retrieved at a particular
385      * rank.  This is equivalent to <tt>n * precision(n)</tt>.
386      */
387     public int relevantRetrieved(int documentsRetrieved) {
388         int low = 0;
389         int high = _relevantRetrieved.size() - 1;
390 
391         // if we didn't retrieve anything relevant, we know the answer
392         if (_relevantRetrieved.size() == 0) {
393             return 0;        // is this after the last relevant document?
394         }
395         Document lastRelevant = _relevantRetrieved.get(high);
396         if (lastRelevant.rank <= documentsRetrieved) {
397             return _relevantRetrieved.size();        // is this before the first relevant document?
398         }
399         Document firstRelevant = _relevantRetrieved.get(low);
400         if (firstRelevant.rank > documentsRetrieved) {
401             return 0;
402         }
403         while ((high - low) >= 2) {
404             int middle = low + (high - low) / 2;
405             Document middleDocument = _relevantRetrieved.get(middle);
406 
407             if (middleDocument.rank == documentsRetrieved) {
408                 return middle + 1;
409             } else if (middleDocument.rank > documentsRetrieved) {
410                 high = middle;
411             } else {
412                 low = middle;
413             }
414         }
415 
416         // if the high document had rank == documentsRetrieved, we would
417         // have already returned, either at the top, or at the return middle statement
418         assert _relevantRetrieved.get(low).rank <= documentsRetrieved &&
419                 _relevantRetrieved.get(high).rank > documentsRetrieved;
420 
421         return low + 1;
422     }
423 
424     /***
425      * @return The list of retrieved documents.
426      */
427     public ArrayList<Document> retrievedDocuments() {
428         return _retrieved;
429     }
430 
431     /***
432      * @return The list of all documents retrieved that were explicitly judged irrelevant.
433      */
434     public ArrayList<Document> judgedIrrelevantRetrievedDocuments() {
435         return _judgedIrrelevantRetrieved;
436     }
437 
438     /***
439      * This method returns a list of all documents that were retrieved
440      * but assumed to be irrelevant.  This includes both documents that were
441      * judged to be irrelevant and those that were not judged at all.
442      * The list is returned in retrieval order.
443      */
444     public ArrayList<Document> irrelevantRetrievedDocuments() {
445         return _irrelevantRetrieved;
446     }
447 
448     /***
449      * Returns a list of retrieved documents that were judged relevant,
450      * in the order that they were retrieved.
451      */
452     public ArrayList<Document> relevantRetrievedDocuments() {
453         return _relevantRetrieved;
454     }
455 
456     /***
457      * Returns a list of all documents judged relevant, whether they were
458      * retrieved or not.  Documents are listed in the order they were retrieved,
459      * with those not retrieved coming last.
460      */
461     public ArrayList<Document> relevantDocuments() {
462         return _relevant;
463     }
464 
465     /***
466      * Returns a list of documents that were judged relevant that
467      * were not retrieved.
468      */
469     public ArrayList<Document> relevantMissedDocuments() {
470         return _relevantMissed;
471     }
472 }