1
2
3 package org.galagosearch.core.parse;
4
5 import java.io.IOException;
6 import org.galagosearch.core.types.AdditionalDocumentText;
7 import org.galagosearch.core.types.ExtractedLink;
8 import org.galagosearch.tupleflow.InputClass;
9 import org.galagosearch.tupleflow.OutputClass;
10 import org.galagosearch.tupleflow.StandardStep;
11 import org.galagosearch.tupleflow.execution.Verified;
12
13 /***
14 *
15 * @author trevor
16 */
17 @Verified
18 @InputClass(className = "org.galagosearch.core.parse.DocumentLinkData")
19 @OutputClass(className = "org.galagosearch.core.types.AdditionalDocumentText")
20 public class AnchorTextCreator extends StandardStep<DocumentLinkData, AdditionalDocumentText> {
21 @Override
22 public void process(DocumentLinkData object) throws IOException {
23 AdditionalDocumentText additional = new AdditionalDocumentText();
24 StringBuilder extraText = new StringBuilder();
25
26 additional.identifier = object.identifier;
27 for (ExtractedLink link : object.links) {
28 extraText.append("<anchor>");
29 extraText.append(link.anchorText);
30 extraText.append("</anchor>");
31 }
32 additional.text = extraText.toString();
33
34 processor.process(additional);
35 }
36 }