1
2
3 package org.galagosearch.core.parse;
4
5 import org.galagosearch.tupleflow.InputClass;
6 import org.galagosearch.tupleflow.OutputClass;
7 import org.galagosearch.tupleflow.StandardStep;
8 import org.galagosearch.tupleflow.TupleFlowParameters;
9 import org.galagosearch.tupleflow.TypeReader;
10 import org.galagosearch.tupleflow.execution.ErrorHandler;
11 import org.galagosearch.tupleflow.execution.Verification;
12 import org.galagosearch.core.types.DocumentWordPosition;
13 import org.galagosearch.core.types.NumberWordPosition;
14 import java.io.IOException;
15 import java.util.HashMap;
16 import org.galagosearch.core.types.NumberedDocumentData;
17
18 /***
19 *
20 * @author trevor
21 */
22 @InputClass(className = "org.galagosearch.core.types.DocumentWordPosition")
23 @OutputClass(className = "org.galagosearch.core.types.NumberWordPosition")
24 public class PositionPostingsNumberer extends StandardStep<DocumentWordPosition, NumberWordPosition>
25 implements DocumentWordPosition.Processor, NumberWordPosition.Source {
26 HashMap<String, Integer> documentNumbers = new HashMap();
27
28 public void process(DocumentWordPosition object) throws IOException {
29 assert documentNumbers.get(object.document) != null : "" + object.document +
30 " has no name, even with " + documentNumbers.size() + " doc names.";
31 processor.process(
32 new NumberWordPosition(documentNumbers.get(object.document),
33 object.word,
34 object.position));
35 }
36
37 public PositionPostingsNumberer(TupleFlowParameters parameters) throws IOException {
38 TypeReader<NumberedDocumentData> reader = parameters.getTypeReader("numberedDocumentData");
39 NumberedDocumentData ndd;
40
41 while ((ndd = reader.read()) != null) {
42 documentNumbers.put(ndd.identifier, ndd.number);
43 }
44 }
45
46 public Class<DocumentWordPosition> getInputClass() {
47 return DocumentWordPosition.class;
48 }
49
50 public Class<NumberWordPosition> getOutputClass() {
51 return NumberWordPosition.class;
52 }
53
54 public static String getInputClass(TupleFlowParameters parameters) {
55 return DocumentWordPosition.class.getName();
56 }
57
58 public static String getOutputClass(TupleFlowParameters parameters) {
59 return NumberWordPosition.class.getName();
60 }
61
62 public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
63 Verification.verifyTypeReader("numberedDocumentData", NumberedDocumentData.class,
64 parameters, handler);
65 }
66 }