1
2
3 package org.galagosearch.core.parse;
4
5 import org.galagosearch.tupleflow.InputClass;
6 import org.galagosearch.tupleflow.OutputClass;
7 import org.galagosearch.tupleflow.StandardStep;
8 import org.galagosearch.tupleflow.execution.Verified;
9 import java.io.IOException;
10 import org.galagosearch.core.types.DocumentExtent;
11
12 /***
13 * Converts all tags from a document object into DocumentExtent tuples.
14 * @author trevor
15 */
16 @InputClass(className = "org.galagosearch.core.parse.Document")
17 @OutputClass(className = "org.galagosearch.core.types.DocumentExtent")
18 @Verified
19 public class ExtentExtractor extends StandardStep<Document, DocumentExtent> {
20 public void process(Document document) throws IOException {
21 for (Tag tag : document.tags) {
22 processor.process(new DocumentExtent(tag.name,
23 document.identifier,
24 tag.begin,
25 tag.end));
26 }
27 }
28 }