View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.index;
4   
5   import org.galagosearch.core.retrieval.structured.*;
6   import org.galagosearch.core.retrieval.query.Node;
7   import org.galagosearch.core.retrieval.query.NodeType;
8   import org.galagosearch.tupleflow.Parameters;
9   import java.io.File;
10  import java.io.IOException;
11  import java.lang.reflect.Constructor;
12  import java.util.HashMap;
13  import java.util.HashSet;
14  import java.util.Map;
15  import java.util.Map.Entry;
16  
17  /***
18   *
19   * @author trevor
20   */
21  public class StructuredIndex {
22      DocumentLengthsReader documentLengths;
23      DocumentNameReader documentNames;
24      Map<String, StructuredIndexPartReader> parts;
25      Parameters manifest;
26  
27      HashMap<String, String> defaultIndexOperators = new HashMap<String, String>();
28      HashSet<String> knownIndexOperators = new HashSet<String>();
29  
30      public StructuredIndex(String filename) throws IOException {
31          manifest = new Parameters();
32          manifest.parse(filename + File.separator + "manifest");
33          documentLengths = new DocumentLengthsReader(filename + File.separator + "documentLengths");
34          documentNames = new DocumentNameReader(filename + File.separator + "documentNames");
35  
36          File partsDirectory = new File(filename + File.separator + "parts");
37          parts = new HashMap<String, StructuredIndexPartReader>();
38          for (File part : partsDirectory.listFiles()) {
39              StructuredIndexPartReader reader = openIndexPart(part.getAbsolutePath());
40              if (reader == null) {
41                  continue;
42              }
43              parts.put(part.getName(), reader);
44          }
45          
46          initializeIndexOperators();
47      }
48  
49      public static StructuredIndexPartReader openIndexPart(String path) throws IOException {
50          if (!IndexReader.isIndexFile(path)) {
51              return null;
52          }
53          IndexReader reader = new IndexReader(path);
54          if (!reader.getManifest().containsKey("readerClass")) {
55              throw new IOException("Tried to open an index part at " + path + ", but the " +
56                                    "file has no readerClass specified in its manifest. " +
57                                    "(the readerClass is the class that knows how to decode the " +
58                                    "contents of the file)");
59          }
60  
61          String className = reader.getManifest().get("readerClass", (String) null);
62          Class readerClass;
63          try {
64              readerClass = Class.forName(className);
65          } catch (ClassNotFoundException e) {
66              throw new IOException("Class " + className + ", which was specified as the readerClass " +
67                                    "in " + path + ", could not be found.");
68          }
69  
70          if (!StructuredIndexPartReader.class.isAssignableFrom(readerClass)) {
71              throw new IOException(className + " is not a StructuredIndexPartReader subclass.");
72          }
73  
74          Constructor c;
75          try {
76              c = readerClass.getConstructor(IndexReader.class);
77          } catch (NoSuchMethodException ex) {
78              throw new IOException(className + " has no constructor that takes a single " +
79                                    "IndexReader argument.");
80          } catch (SecurityException ex) {
81              throw new IOException(className + " doesn't have a suitable constructor that " +
82                                    "this code has access to (SecurityException)");
83          }
84  
85          StructuredIndexPartReader partReader;
86          try {
87              partReader = (StructuredIndexPartReader) c.newInstance(reader);
88          } catch (Exception ex) {
89              IOException e = new IOException("Caught an exception while instantiating " +
90                                              "a StructuredIndexPartReader: ");
91              e.initCause(ex);
92              throw e;
93          }
94          return partReader;
95      }
96  
97      /***
98       * Tests to see if a named index part exists.
99       * 
100      * @param partName The name of the index part to check.
101      * @return true, if this index has a part called partName, or false otherwise.
102      */
103     public boolean containsPart(String partName) {
104         return parts.containsKey(partName);
105     }
106 
107     void initializeIndexOperators() {
108         for (Entry<String, StructuredIndexPartReader> entry : parts.entrySet()) {
109             String partName = entry.getKey();
110             StructuredIndexPartReader part = entry.getValue();
111 
112             for (String name : part.getNodeTypes().keySet()) {
113                 knownIndexOperators.add(name);
114 
115                 if (!defaultIndexOperators.containsKey(name)) {
116                     defaultIndexOperators.put(name, partName);
117                 } else if (name.startsWith("default")) {
118                     if (defaultIndexOperators.get(name).startsWith("default")) {
119                         defaultIndexOperators.remove(name);
120                     } else {
121                         defaultIndexOperators.put(name, partName);
122                     }
123                 } else {
124                     defaultIndexOperators.remove(name);
125                 }
126             }
127         }
128     }
129     
130     private StructuredIndexPartReader getIndexPart(Node node) throws IOException {
131         String operator = node.getOperator();
132         StructuredIndexPartReader part = null;
133         
134         if (node.getParameters().containsKey("part")) {
135             String partName = node.getParameters().get("part");
136             if (!parts.containsKey(partName)) {
137                 throw new IOException("The index has no part named '" + partName + "'");
138             }
139             part = parts.get(partName);
140         } else if (knownIndexOperators.contains(operator)) {
141             if (!defaultIndexOperators.containsKey(operator)) {
142                 throw new IOException("More than one index part supplies the operator '" +
143                                       operator + "', but no part name was specified.");
144             } else {
145                 String partName = defaultIndexOperators.get(operator);
146                 part = parts.get(partName);
147             }
148         }
149         return part;
150     }
151     
152     public StructuredIterator getIterator(Node node) throws IOException {
153         StructuredIterator result = null;
154         StructuredIndexPartReader part = getIndexPart(node);
155         if (part != null) {
156             result = part.getIterator(node);
157             if (result == null) {
158                 result = new NullExtentIterator();
159             }
160         }
161         return result;
162     }
163     
164     public NodeType getNodeType(Node node) throws IOException {
165         NodeType result = null;
166         StructuredIndexPartReader part = getIndexPart(node);
167         if (part != null) {
168             final String operator = node.getOperator();
169             final Map<String, NodeType> nodeTypes = part.getNodeTypes();
170             result = nodeTypes.get(operator);
171         }
172         return result;
173     }
174 
175     public long getCollectionLength() {
176         return manifest.get("collectionLength", (long) 0);
177     }
178 
179     public long getDocumentCount() {
180         return manifest.get("documentCount", (long) 0);
181     }
182 
183     public void close() throws IOException {
184         for (StructuredIndexPartReader part : parts.values()) {
185             part.close();
186         }
187         parts.clear();
188         documentLengths.close();
189     }
190 
191     public int getLength(int document) {
192         return documentLengths.getLength(document);
193     }
194 
195     public String getDocumentName(int document) {
196         return documentNames.get(document);
197     }
198 }