1
2
3 package org.galagosearch.core.parse;
4
5 import java.util.HashMap;
6
7 /***
8 * The point of this class is to replace strings in document objects with
9 * already-used copies. This can greatly reduce the amount of memory used
10 * by the system.
11 *
12 * @author trevor
13 */
14 public class StringPooler {
15 HashMap<String, String> pool = new HashMap<String, String>();
16
17 /***
18 * Replaces the strings within this document with strings in a
19 * string pool.
20 *
21 * @param document
22 */
23 public void transform(Document document) {
24 for (int i = 0; i < document.terms.size(); i++) {
25 String term = document.terms.get(i);
26
27 if (term == null) {
28 continue;
29 }
30 String cached = pool.get(term);
31
32 if (cached == null) {
33 term = new String(term);
34 pool.put(term, term);
35 } else {
36 term = cached;
37 }
38
39 document.terms.set(i, term);
40 }
41
42
43 if (pool.size() > 10000) {
44 pool.clear();
45 }
46 }
47 }