1
2
3 package org.galagosearch.core.eval;
4
5 import java.util.ArrayList;
6 import java.util.Collection;
7 import java.util.HashMap;
8 import java.util.List;
9 import java.util.TreeMap;
10
11 /***
12 * <p>A retrieval evaluator object computes a variety of standard
13 * information retrieval metrics commonly used in TREC, including
14 * binary preference (BPREF), geometric mean average precision (GMAP),
15 * mean average precision (MAP), and standard precision and recall.
16 * In addition, the object gives access to the relevant documents that
17 * were found, and the relevant documents that were missed.</p>
18 *
19 * <p>BPREF is defined in Buckley and Voorhees, "Retrieval Evaluation
20 * with Incomplete Information", SIGIR 2004.</p>
21 *
22 * @author Trevor Strohman
23 */
24 public class RetrievalEvaluator {
25 /***
26 * This class represents a document returned by a retrieval
27 * system. It can be subclassed if you want to add system-dependent
28 * information to the document representation.
29 */
30 public static class Document {
31 /***
32 * Constructs a new Document object.
33 *
34 * @param documentNumber The document identifier.
35 * @param rank The rank of the document in a retrieved ranked list.
36 * @param score The score given to this document by the retrieval system.
37 */
38 public Document(String documentNumber, int rank, double score) {
39 this.documentNumber = documentNumber;
40 this.rank = rank;
41 this.score = score;
42 }
43
44 /***
45 * Constructs a new Document object.
46 *
47 * @param documentNumber The document identifier.
48 */
49 public Document(String documentNumber) {
50 this.documentNumber = documentNumber;
51 this.rank = Integer.MAX_VALUE;
52 this.score = Double.NEGATIVE_INFINITY;
53 }
54 /*** The rank of the document in a retrieved ranked list. */
55 public int rank;
56 /*** The document identifier. */
57 public String documentNumber;
58 /*** The score given to this document by the retrieval system. */
59 public double score;
60 }
61
62 /***
63 * This class represents a relevance judgment of a particular document
64 * for a specific query.
65 */
66 public static class Judgment {
67 /***
68 * Constructs a new Judgment instance.
69 *
70 * @param documentNumber The document identifier.
71 * @param judgment The relevance judgment for this document, where positive values mean relevant, and zero means not relevant.
72 */
73 public Judgment(String documentNumber, int judgment) {
74 this.documentNumber = documentNumber;
75 this.judgment = judgment;
76 }
77 /*** The document identifier. */
78 public String documentNumber;
79 /*** The relevance judgment for this document, where positive values mean relevant, and zero means not relevant. */
80 public int judgment;
81 }
82 private String _queryName;
83 private ArrayList<Document> _retrieved;
84 private ArrayList<Document> _judgedMissed;
85 private ArrayList<Document> _relevant;
86 private ArrayList<Document> _relevantRetrieved;
87 private ArrayList<Document> _judgedIrrelevantRetrieved;
88 private ArrayList<Document> _irrelevantRetrieved;
89 private ArrayList<Document> _relevantMissed;
90 private HashMap<String, Judgment> _judgments;
91
92 /***
93 * Creates a new instance of RetrievalEvaluator
94 *
95 * @param retrieved A ranked list of retrieved documents.
96 * @param judgments A collection of relevance judgments.
97 */
98 public RetrievalEvaluator(String queryName, List<Document> retrieved, Collection<Judgment> judgments) {
99 _queryName = queryName;
100 _retrieved = new ArrayList<Document>(retrieved);
101
102 _buildJudgments(judgments);
103 _judgeRetrievedDocuments();
104 _findMissedDocuments();
105 _findRelevantDocuments();
106 }
107
108 private void _buildJudgments(Collection<Judgment> judgments) {
109 _judgments = new HashMap<String, Judgment>();
110
111 for (Judgment judgment : judgments) {
112 _judgments.put(judgment.documentNumber, judgment);
113 }
114 }
115
116 private void _judgeRetrievedDocuments() {
117 _irrelevantRetrieved = new ArrayList<Document>();
118 _relevantRetrieved = new ArrayList<Document>();
119 _judgedIrrelevantRetrieved = new ArrayList<Document>();
120
121 for (Document document : _retrieved) {
122 boolean relevant = false;
123 boolean judgedIrrelevant = false;
124 Judgment judgment = _judgments.get(document.documentNumber);
125
126 relevant = (judgment != null) && (judgment.judgment > 0);
127 judgedIrrelevant = (judgment != null) && (judgment.judgment <= 0);
128
129 if (relevant) {
130 _relevantRetrieved.add(document);
131 } else {
132 _irrelevantRetrieved.add(document);
133
134 if (judgedIrrelevant) {
135 _judgedIrrelevantRetrieved.add(document);
136 }
137 }
138 }
139 }
140
141 private void _findMissedDocuments() {
142 HashMap<String, Judgment> missedDocuments = new HashMap<String, Judgment>(_judgments);
143
144 for (Document document : _relevantRetrieved) {
145 missedDocuments.remove(document.documentNumber);
146 }
147
148 for (Document document : _judgedIrrelevantRetrieved) {
149 missedDocuments.remove(document.documentNumber);
150 }
151
152 _judgedMissed = new ArrayList<Document>();
153 _relevantMissed = new ArrayList<Document>();
154
155 for (Judgment judgment : missedDocuments.values()) {
156 Document document = new Document(judgment.documentNumber);
157 _judgedMissed.add(document);
158
159 if (judgment.judgment > 0) {
160 _relevantMissed.add(document);
161 }
162 }
163 }
164
165 private void _findRelevantDocuments() {
166 _relevant = new ArrayList<Document>();
167 _relevant.addAll(_relevantRetrieved);
168 _relevant.addAll(_relevantMissed);
169 }
170
171 /***
172 * Returns the name of the query represented by this evaluator.
173 */
174 public String queryName() {
175 return _queryName;
176 }
177
178 /***
179 * Returns the precision of the retrieval at a given number of documents retrieved.
180 * The precision is the number of relevant documents retrieved
181 * divided by the total number of documents retrieved.
182 *
183 * @param documentsRetrieved The evaluation rank.
184 */
185 public double precision(int documentsRetrieved) {
186 return (double) relevantRetrieved(documentsRetrieved) / (double) documentsRetrieved;
187 }
188
189 /***
190 * Returns the recall of the retrieval at a given number of documents retrieved.
191 * The recall is the number of relevant documents retrieved
192 * divided by the total number of relevant documents for the query.
193 *
194 * @param documentsRetrieved The evaluation rank.
195 */
196 public double recall(int documentsRetrieved) {
197 return (double) relevantRetrieved(documentsRetrieved) / (double) _relevant.size();
198 }
199
200 /***
201 * Returns the precision at the rank equal to the total number of
202 * relevant documents retrieved. This method is equivalent to
203 * precision(relevantDocuments().size()).
204 */
205 public double rPrecision() {
206 int relevantCount = _relevant.size();
207 int retrievedCount = _retrieved.size();
208
209 if (relevantCount > retrievedCount) {
210 return 0;
211 }
212
213 return precision(relevantCount);
214 }
215
216 /***
217 * Returns the reciprocal of the rank of the first relevant document
218 * retrieved, or zero if no relevant documents were retrieved.
219 */
220 public double reciprocalRank() {
221 if (_relevantRetrieved.size() == 0) {
222 return 0;
223 }
224 return 1.0 / (double) _relevantRetrieved.get(0).rank;
225 }
226
227 /***
228 * Returns the average precision of the query.<p>
229 *
230 * Suppose the precision is evaluated once at the rank of
231 * each relevant document in the retrieval. If a document is
232 * not retrieved, we assume that it was retrieved at rank infinity.
233 * The mean of all these precision values is the average precision.
234 */
235 public double averagePrecision() {
236 double sumPrecision = 0;
237 int relevantCount = 0;
238
239 for (Document document : _relevantRetrieved) {
240 relevantCount++;
241 sumPrecision += relevantCount / (double) document.rank;
242 }
243
244 return (double) sumPrecision / _relevant.size();
245 }
246
247 /***
248 * <p>The binary preference measure, as presented in Buckley, Voorhees
249 * "Retrieval Evaluation with Incomplete Information", SIGIR 2004.
250 * This implemenation is the 'pure' version, which is the one
251 * used in Buckley's trec_eval.</p>
252 *
253 * <p>The formula is:
254 * <tt>1/R \sum_{r} 1 - |n ranked greater than r| / R</tt>
255 * where R is the number of relevant documents for this topic, and
256 * n is a member of the set of first R judged irrelevant documents
257 * retrieved.</p>
258 */
259 public double binaryPreference() {
260
261
262 int totalRelevant = _relevant.size();
263 int i = 0;
264 int j = 0;
265 int irrelevantCount = Math.min(totalRelevant, _judgedIrrelevantRetrieved.size());
266 List<Document> irrelevant = _judgedIrrelevantRetrieved.subList(0, irrelevantCount);
267 double sum = 0;
268
269 while (i < _relevantRetrieved.size() && j < irrelevant.size()) {
270 Document rel = _relevantRetrieved.get(i);
271 Document irr = irrelevant.get(j);
272
273 if (rel.rank < irr.rank) {
274
275
276 assert j <= totalRelevant;
277 sum += 1.0 - ((double) j / (double) irrelevantCount);
278 i++;
279 } else {
280 j++;
281 }
282 }
283
284 return sum / (double) totalRelevant;
285 }
286
287 /***
288 * <p>Normalized Discounted Cumulative Gain </p>
289 *
290 * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
291 * for Retrieving Highly Relevant Documents" SIGIR 2001. I copied the formula
292 * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
293 * Search", SIGIR 2006.
294 *
295 * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
296 *
297 * Where N is such that the score cannot be greater than 1. We compute this
298 * by computing the DCG (unnormalized) of a perfect ranking.
299 */
300 public double normalizedDiscountedCumulativeGain() {
301 return normalizedDiscountedCumulativeGain(Math.max(_retrieved.size(), _judgments.size()));
302 }
303
304 /***
305 * <p>Normalized Discounted Cumulative Gain </p>
306 *
307 * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
308 * for Retrieving Highly Relevant Documents" SIGIR 2001. I copied the formula
309 * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
310 * Search", SIGIR 2006.
311 *
312 * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
313 *
314 * Where N is such that the score cannot be greater than 1. We compute this
315 * by computing the DCG (unnormalized) of a perfect ranking.
316 */
317 public double normalizedDiscountedCumulativeGain(int documentsRetrieved) {
318
319 double normalizer = normalizationTermNDCG(documentsRetrieved);
320
321
322 double dcg = 0;
323 List<Document> truncated = _retrieved;
324
325 if (_retrieved.size() > documentsRetrieved) {
326 truncated = _retrieved.subList(0,
327 documentsRetrieved);
328 }
329 for (Document document : truncated) {
330 Judgment judgment = _judgments.get(document.documentNumber);
331
332 if (judgment != null && judgment.judgment > 0) {
333 dcg += (Math.pow(2, judgment.judgment) - 1.0) / Math.log(1 + document.rank);
334 }
335 }
336
337 return dcg / normalizer;
338 }
339
340 protected double normalizationTermNDCG(int documentsRetrieved) {
341 TreeMap<Integer, Integer> relevanceCounts = new TreeMap();
342
343
344
345
346
347
348
349
350 for (Judgment judgment : _judgments.values()) {
351 if (judgment.judgment == 0) {
352 continue;
353 }
354 if (!relevanceCounts.containsKey(-judgment.judgment)) {
355 relevanceCounts.put(-judgment.judgment, 0);
356 }
357
358 relevanceCounts.put(-judgment.judgment, relevanceCounts.get(-judgment.judgment) + 1);
359 }
360
361 double normalizer = 0;
362 int countsSoFar = 0;
363 int documentsProcessed = 0;
364
365 for (Integer negativeRelevanceValue : relevanceCounts.keySet()) {
366 int relevanceCount = (int) relevanceCounts.get(negativeRelevanceValue);
367 int relevanceValue = -negativeRelevanceValue;
368 relevanceCount = Math.min(relevanceCount, documentsRetrieved - documentsProcessed);
369
370 for (int i = 1; i <= relevanceCount; i++) {
371 normalizer += (Math.pow(2, relevanceValue) - 1.0) / Math.log(1 + i + countsSoFar);
372 }
373
374 documentsProcessed += relevanceCount;
375 if (documentsProcessed >= documentsRetrieved) {
376 break;
377 }
378 }
379
380 return normalizer;
381 }
382
383 /***
384 * The number of relevant documents retrieved at a particular
385 * rank. This is equivalent to <tt>n * precision(n)</tt>.
386 */
387 public int relevantRetrieved(int documentsRetrieved) {
388 int low = 0;
389 int high = _relevantRetrieved.size() - 1;
390
391
392 if (_relevantRetrieved.size() == 0) {
393 return 0;
394 }
395 Document lastRelevant = _relevantRetrieved.get(high);
396 if (lastRelevant.rank <= documentsRetrieved) {
397 return _relevantRetrieved.size();
398 }
399 Document firstRelevant = _relevantRetrieved.get(low);
400 if (firstRelevant.rank > documentsRetrieved) {
401 return 0;
402 }
403 while ((high - low) >= 2) {
404 int middle = low + (high - low) / 2;
405 Document middleDocument = _relevantRetrieved.get(middle);
406
407 if (middleDocument.rank == documentsRetrieved) {
408 return middle + 1;
409 } else if (middleDocument.rank > documentsRetrieved) {
410 high = middle;
411 } else {
412 low = middle;
413 }
414 }
415
416
417
418 assert _relevantRetrieved.get(low).rank <= documentsRetrieved &&
419 _relevantRetrieved.get(high).rank > documentsRetrieved;
420
421 return low + 1;
422 }
423
424 /***
425 * @return The list of retrieved documents.
426 */
427 public ArrayList<Document> retrievedDocuments() {
428 return _retrieved;
429 }
430
431 /***
432 * @return The list of all documents retrieved that were explicitly judged irrelevant.
433 */
434 public ArrayList<Document> judgedIrrelevantRetrievedDocuments() {
435 return _judgedIrrelevantRetrieved;
436 }
437
438 /***
439 * This method returns a list of all documents that were retrieved
440 * but assumed to be irrelevant. This includes both documents that were
441 * judged to be irrelevant and those that were not judged at all.
442 * The list is returned in retrieval order.
443 */
444 public ArrayList<Document> irrelevantRetrievedDocuments() {
445 return _irrelevantRetrieved;
446 }
447
448 /***
449 * Returns a list of retrieved documents that were judged relevant,
450 * in the order that they were retrieved.
451 */
452 public ArrayList<Document> relevantRetrievedDocuments() {
453 return _relevantRetrieved;
454 }
455
456 /***
457 * Returns a list of all documents judged relevant, whether they were
458 * retrieved or not. Documents are listed in the order they were retrieved,
459 * with those not retrieved coming last.
460 */
461 public ArrayList<Document> relevantDocuments() {
462 return _relevant;
463 }
464
465 /***
466 * Returns a list of documents that were judged relevant that
467 * were not retrieved.
468 */
469 public ArrayList<Document> relevantMissedDocuments() {
470 return _relevantMissed;
471 }
472 }