View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.parse;
4   
5   import org.galagosearch.tupleflow.InputClass;
6   import org.galagosearch.tupleflow.OutputClass;
7   import org.galagosearch.tupleflow.StandardStep;
8   import org.galagosearch.tupleflow.TupleFlowParameters;
9   import org.galagosearch.tupleflow.TypeReader;
10  import org.galagosearch.tupleflow.execution.ErrorHandler;
11  import org.galagosearch.tupleflow.execution.Verification;
12  import org.galagosearch.core.types.DocumentWordPosition;
13  import org.galagosearch.core.types.NumberWordPosition;
14  import java.io.IOException;
15  import java.util.HashMap;
16  import org.galagosearch.core.types.NumberedDocumentData;
17  
18  /***
19   *
20   * @author trevor
21   */
22  @InputClass(className = "org.galagosearch.core.types.DocumentWordPosition")
23  @OutputClass(className = "org.galagosearch.core.types.NumberWordPosition")
24  public class PositionPostingsNumberer extends StandardStep<DocumentWordPosition, NumberWordPosition>
25          implements DocumentWordPosition.Processor, NumberWordPosition.Source {
26      HashMap<String, Integer> documentNumbers = new HashMap();
27  
28      public void process(DocumentWordPosition object) throws IOException {
29          assert documentNumbers.get(object.document) != null : "" + object.document +
30                  " has no name, even with " + documentNumbers.size() + " doc names.";
31          processor.process(
32                  new NumberWordPosition(documentNumbers.get(object.document),
33                                         object.word,
34                                         object.position));
35      }
36  
37      public PositionPostingsNumberer(TupleFlowParameters parameters) throws IOException {
38          TypeReader<NumberedDocumentData> reader = parameters.getTypeReader("numberedDocumentData");
39          NumberedDocumentData ndd;
40  
41          while ((ndd = reader.read()) != null) {
42              documentNumbers.put(ndd.identifier, ndd.number);
43          }
44      }
45  
46      public Class<DocumentWordPosition> getInputClass() {
47          return DocumentWordPosition.class;
48      }
49  
50      public Class<NumberWordPosition> getOutputClass() {
51          return NumberWordPosition.class;
52      }
53  
54      public static String getInputClass(TupleFlowParameters parameters) {
55          return DocumentWordPosition.class.getName();
56      }
57  
58      public static String getOutputClass(TupleFlowParameters parameters) {
59          return NumberWordPosition.class.getName();
60      }
61  
62      public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
63          Verification.verifyTypeReader("numberedDocumentData", NumberedDocumentData.class,
64                  parameters, handler);
65      }
66  }