View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.parse;
4   
5   import java.io.IOException;
6   import org.galagosearch.core.types.AdditionalDocumentText;
7   import org.galagosearch.core.types.ExtractedLink;
8   import org.galagosearch.tupleflow.InputClass;
9   import org.galagosearch.tupleflow.OutputClass;
10  import org.galagosearch.tupleflow.StandardStep;
11  import org.galagosearch.tupleflow.execution.Verified;
12  
13  /***
14   *
15   * @author trevor
16   */
17  @Verified
18  @InputClass(className = "org.galagosearch.core.parse.DocumentLinkData")
19  @OutputClass(className = "org.galagosearch.core.types.AdditionalDocumentText")
20  public class AnchorTextCreator extends StandardStep<DocumentLinkData, AdditionalDocumentText> {
21      @Override
22      public void process(DocumentLinkData object) throws IOException {
23          AdditionalDocumentText additional = new AdditionalDocumentText();
24          StringBuilder extraText = new StringBuilder();
25  
26          additional.identifier = object.identifier;
27          for (ExtractedLink link : object.links) {
28              extraText.append("<anchor>");
29              extraText.append(link.anchorText);
30              extraText.append("</anchor>");
31          }
32          additional.text = extraText.toString();
33  
34          processor.process(additional);
35      }
36  }