1
2
3 package org.galagosearch.core.index;
4
5 import org.galagosearch.core.retrieval.structured.*;
6 import org.galagosearch.core.retrieval.query.Node;
7 import org.galagosearch.core.retrieval.query.NodeType;
8 import org.galagosearch.tupleflow.Parameters;
9 import java.io.File;
10 import java.io.IOException;
11 import java.lang.reflect.Constructor;
12 import java.util.HashMap;
13 import java.util.HashSet;
14 import java.util.Map;
15 import java.util.Map.Entry;
16
17 /***
18 *
19 * @author trevor
20 */
21 public class StructuredIndex {
22 DocumentLengthsReader documentLengths;
23 DocumentNameReader documentNames;
24 Map<String, StructuredIndexPartReader> parts;
25 Parameters manifest;
26
27 HashMap<String, String> defaultIndexOperators = new HashMap<String, String>();
28 HashSet<String> knownIndexOperators = new HashSet<String>();
29
30 public StructuredIndex(String filename) throws IOException {
31 manifest = new Parameters();
32 manifest.parse(filename + File.separator + "manifest");
33 documentLengths = new DocumentLengthsReader(filename + File.separator + "documentLengths");
34 documentNames = new DocumentNameReader(filename + File.separator + "documentNames");
35
36 File partsDirectory = new File(filename + File.separator + "parts");
37 parts = new HashMap<String, StructuredIndexPartReader>();
38 for (File part : partsDirectory.listFiles()) {
39 StructuredIndexPartReader reader = openIndexPart(part.getAbsolutePath());
40 if (reader == null) {
41 continue;
42 }
43 parts.put(part.getName(), reader);
44 }
45
46 initializeIndexOperators();
47 }
48
49 public static StructuredIndexPartReader openIndexPart(String path) throws IOException {
50 if (!IndexReader.isIndexFile(path)) {
51 return null;
52 }
53 IndexReader reader = new IndexReader(path);
54 if (!reader.getManifest().containsKey("readerClass")) {
55 throw new IOException("Tried to open an index part at " + path + ", but the " +
56 "file has no readerClass specified in its manifest. " +
57 "(the readerClass is the class that knows how to decode the " +
58 "contents of the file)");
59 }
60
61 String className = reader.getManifest().get("readerClass", (String) null);
62 Class readerClass;
63 try {
64 readerClass = Class.forName(className);
65 } catch (ClassNotFoundException e) {
66 throw new IOException("Class " + className + ", which was specified as the readerClass " +
67 "in " + path + ", could not be found.");
68 }
69
70 if (!StructuredIndexPartReader.class.isAssignableFrom(readerClass)) {
71 throw new IOException(className + " is not a StructuredIndexPartReader subclass.");
72 }
73
74 Constructor c;
75 try {
76 c = readerClass.getConstructor(IndexReader.class);
77 } catch (NoSuchMethodException ex) {
78 throw new IOException(className + " has no constructor that takes a single " +
79 "IndexReader argument.");
80 } catch (SecurityException ex) {
81 throw new IOException(className + " doesn't have a suitable constructor that " +
82 "this code has access to (SecurityException)");
83 }
84
85 StructuredIndexPartReader partReader;
86 try {
87 partReader = (StructuredIndexPartReader) c.newInstance(reader);
88 } catch (Exception ex) {
89 IOException e = new IOException("Caught an exception while instantiating " +
90 "a StructuredIndexPartReader: ");
91 e.initCause(ex);
92 throw e;
93 }
94 return partReader;
95 }
96
97 /***
98 * Tests to see if a named index part exists.
99 *
100 * @param partName The name of the index part to check.
101 * @return true, if this index has a part called partName, or false otherwise.
102 */
103 public boolean containsPart(String partName) {
104 return parts.containsKey(partName);
105 }
106
107 void initializeIndexOperators() {
108 for (Entry<String, StructuredIndexPartReader> entry : parts.entrySet()) {
109 String partName = entry.getKey();
110 StructuredIndexPartReader part = entry.getValue();
111
112 for (String name : part.getNodeTypes().keySet()) {
113 knownIndexOperators.add(name);
114
115 if (!defaultIndexOperators.containsKey(name)) {
116 defaultIndexOperators.put(name, partName);
117 } else if (name.startsWith("default")) {
118 if (defaultIndexOperators.get(name).startsWith("default")) {
119 defaultIndexOperators.remove(name);
120 } else {
121 defaultIndexOperators.put(name, partName);
122 }
123 } else {
124 defaultIndexOperators.remove(name);
125 }
126 }
127 }
128 }
129
130 private StructuredIndexPartReader getIndexPart(Node node) throws IOException {
131 String operator = node.getOperator();
132 StructuredIndexPartReader part = null;
133
134 if (node.getParameters().containsKey("part")) {
135 String partName = node.getParameters().get("part");
136 if (!parts.containsKey(partName)) {
137 throw new IOException("The index has no part named '" + partName + "'");
138 }
139 part = parts.get(partName);
140 } else if (knownIndexOperators.contains(operator)) {
141 if (!defaultIndexOperators.containsKey(operator)) {
142 throw new IOException("More than one index part supplies the operator '" +
143 operator + "', but no part name was specified.");
144 } else {
145 String partName = defaultIndexOperators.get(operator);
146 part = parts.get(partName);
147 }
148 }
149 return part;
150 }
151
152 public StructuredIterator getIterator(Node node) throws IOException {
153 StructuredIterator result = null;
154 StructuredIndexPartReader part = getIndexPart(node);
155 if (part != null) {
156 result = part.getIterator(node);
157 if (result == null) {
158 result = new NullExtentIterator();
159 }
160 }
161 return result;
162 }
163
164 public NodeType getNodeType(Node node) throws IOException {
165 NodeType result = null;
166 StructuredIndexPartReader part = getIndexPart(node);
167 if (part != null) {
168 final String operator = node.getOperator();
169 final Map<String, NodeType> nodeTypes = part.getNodeTypes();
170 result = nodeTypes.get(operator);
171 }
172 return result;
173 }
174
175 public long getCollectionLength() {
176 return manifest.get("collectionLength", (long) 0);
177 }
178
179 public long getDocumentCount() {
180 return manifest.get("documentCount", (long) 0);
181 }
182
183 public void close() throws IOException {
184 for (StructuredIndexPartReader part : parts.values()) {
185 part.close();
186 }
187 parts.clear();
188 documentLengths.close();
189 }
190
191 public int getLength(int document) {
192 return documentLengths.getLength(document);
193 }
194
195 public String getDocumentName(int document) {
196 return documentNames.get(document);
197 }
198 }