View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.parse;
4   
5   import java.util.HashMap;
6   
7   /***
8    * The point of this class is to replace strings in document objects with
9    * already-used copies.  This can greatly reduce the amount of memory used
10   * by the system.
11   *
12   * @author trevor
13   */
14  public class StringPooler {
15      HashMap<String, String> pool = new HashMap<String, String>();
16  
17      /***
18       * Replaces the strings within this document with strings in a
19       * string pool.
20       * 
21       * @param document
22       */
23      public void transform(Document document) {
24          for (int i = 0; i < document.terms.size(); i++) {
25              String term = document.terms.get(i);
26  
27              if (term == null) {
28                  continue;
29              }
30              String cached = pool.get(term);
31  
32              if (cached == null) {
33                  term = new String(term);
34                  pool.put(term, term);
35              } else {
36                  term = cached;
37              }
38  
39              document.terms.set(i, term);
40          }
41  
42          // The choice of 10000 is arbitrary; it seemed big enough to make a difference.
43          if (pool.size() > 10000) {
44              pool.clear();
45          }
46      }
47  }