Coverage Report - org.galagosearch.core.types.IdentifiedLink
 
Classes in this File Line Coverage Branch Coverage Complexity
IdentifiedLink
0%
0/10
0%
0/2
0
IdentifiedLink$Processor
N/A
N/A
0
IdentifiedLink$Source
N/A
N/A
0
IdentifiedLink$UrlOrder
0%
0/24
0%
0/4
0
IdentifiedLink$UrlOrder$1
0%
0/5
0%
0/2
0
IdentifiedLink$UrlOrder$2
0%
0/5
0%
0/2
0
IdentifiedLink$UrlOrder$DuplicateEliminator
0%
0/19
0%
0/4
0
IdentifiedLink$UrlOrder$OrderedWriterClass
0%
0/14
0%
0/6
0
IdentifiedLink$UrlOrder$ShreddedBuffer
0%
0/78
0%
0/50
0
IdentifiedLink$UrlOrder$ShreddedCombiner
0%
0/55
0%
0/36
0
IdentifiedLink$UrlOrder$ShreddedProcessor
N/A
N/A
0
IdentifiedLink$UrlOrder$ShreddedReader
0%
0/70
0%
0/34
0
IdentifiedLink$UrlOrder$ShreddedSource
N/A
N/A
0
IdentifiedLink$UrlOrder$ShreddedWriter
0%
0/37
0%
0/14
0
IdentifiedLink$UrlOrder$TupleShredder
0%
0/18
0%
0/8
0
IdentifiedLink$UrlOrder$TupleUnshredder
0%
0/21
0%
0/2
0
 
 1  
 // This file was automatically generated with the command: 
 2  
 //     java org.galagosearch.tupleflow.typebuilder.TypeBuilderMojo ...
 3  
 package org.galagosearch.core.types;
 4  
 
 5  
 import org.galagosearch.tupleflow.Utility;
 6  
 import org.galagosearch.tupleflow.ArrayInput;
 7  
 import org.galagosearch.tupleflow.ArrayOutput;
 8  
 import org.galagosearch.tupleflow.Order;   
 9  
 import org.galagosearch.tupleflow.OrderedWriter;
 10  
 import org.galagosearch.tupleflow.Type; 
 11  
 import org.galagosearch.tupleflow.TypeReader;
 12  
 import org.galagosearch.tupleflow.Step; 
 13  
 import org.galagosearch.tupleflow.IncompatibleProcessorException;
 14  
 import org.galagosearch.tupleflow.ReaderSource;
 15  
 import java.io.IOException;             
 16  
 import java.io.EOFException;
 17  
 import java.io.UnsupportedEncodingException;
 18  
 import java.util.ArrayList;
 19  
 import java.util.Arrays;   
 20  
 import java.util.Comparator;
 21  
 import java.util.PriorityQueue;
 22  
 import java.util.Collection;
 23  
 
 24  
 public class IdentifiedLink implements Type<IdentifiedLink> {
 25  
     public String identifier;
 26  
     public String url;
 27  
     public String anchorText; 
 28  
     
 29  0
     public IdentifiedLink() {}
 30  0
     public IdentifiedLink(String identifier, String url, String anchorText) {
 31  0
         this.identifier = identifier;
 32  0
         this.url = url;
 33  0
         this.anchorText = anchorText;
 34  0
     }  
 35  
     
 36  
     public String toString() {
 37  0
             return String.format("%s,%s,%s",
 38  
                                    identifier, url, anchorText);
 39  
     } 
 40  
 
 41  
     public Order<IdentifiedLink> getOrder(String... spec) {
 42  0
         if (Arrays.equals(spec, new String[] { "+url" })) {
 43  0
             return new UrlOrder();
 44  
         }
 45  0
         return null;
 46  
     } 
 47  
       
 48  
     public interface Processor extends Step, org.galagosearch.tupleflow.Processor<IdentifiedLink> {
 49  
         public void process(IdentifiedLink object) throws IOException;
 50  
         public void close() throws IOException;
 51  
     }                        
 52  
     public interface Source extends Step {
 53  
     }
 54  0
     public static class UrlOrder implements Order<IdentifiedLink> {
 55  
         public int hash(IdentifiedLink object) {
 56  0
             int h = 0;
 57  0
             h += Utility.hash(object.url);
 58  0
             return h;
 59  
         } 
 60  
         public Comparator<IdentifiedLink> greaterThan() {
 61  0
             return new Comparator<IdentifiedLink>() {
 62  0
                 public int compare(IdentifiedLink one, IdentifiedLink two) {
 63  0
                     int result = 0;
 64  
                     do {
 65  0
                         result = + Utility.compare(one.url, two.url);
 66  0
                         if(result != 0) break;
 67  
                     } while (false);
 68  0
                     return -result;
 69  
                 }
 70  
             };
 71  
         }     
 72  
         public Comparator<IdentifiedLink> lessThan() {
 73  0
             return new Comparator<IdentifiedLink>() {
 74  0
                 public int compare(IdentifiedLink one, IdentifiedLink two) {
 75  0
                     int result = 0;
 76  
                     do {
 77  0
                         result = + Utility.compare(one.url, two.url);
 78  0
                         if(result != 0) break;
 79  
                     } while (false);
 80  0
                     return result;
 81  
                 }
 82  
             };
 83  
         }     
 84  
         public TypeReader<IdentifiedLink> orderedReader(ArrayInput _input) {
 85  0
             return new ShreddedReader(_input);
 86  
         }    
 87  
 
 88  
         public TypeReader<IdentifiedLink> orderedReader(ArrayInput _input, int bufferSize) {
 89  0
             return new ShreddedReader(_input, bufferSize);
 90  
         }    
 91  
         public OrderedWriter<IdentifiedLink> orderedWriter(ArrayOutput _output) {
 92  0
             ShreddedWriter w = new ShreddedWriter(_output);
 93  0
             return new OrderedWriterClass(w); 
 94  
         }                                    
 95  0
         public static class OrderedWriterClass extends OrderedWriter< IdentifiedLink > {
 96  0
             IdentifiedLink last = null;
 97  0
             ShreddedWriter shreddedWriter = null; 
 98  
             
 99  0
             public OrderedWriterClass(ShreddedWriter s) {
 100  0
                 this.shreddedWriter = s;
 101  0
             }
 102  
             
 103  
             public void process(IdentifiedLink object) throws IOException {
 104  0
                boolean processAll = false;
 105  0
                if (processAll || last == null || 0 != Utility.compare(object.url, last.url)) { processAll = true; shreddedWriter.processUrl(object.url); }
 106  0
                shreddedWriter.processTuple(object.identifier, object.anchorText);
 107  0
                last = object;
 108  0
             }           
 109  
                  
 110  
             public void close() throws IOException {
 111  0
                 shreddedWriter.close();
 112  0
             }
 113  
             
 114  
             public Class<IdentifiedLink> getInputClass() {
 115  0
                 return IdentifiedLink.class;
 116  
             }
 117  
         } 
 118  
         public ReaderSource<IdentifiedLink> orderedCombiner(Collection<TypeReader<IdentifiedLink>> readers, boolean closeOnExit) {
 119  0
             ArrayList<ShreddedReader> shreddedReaders = new ArrayList();
 120  
             
 121  0
             for (TypeReader<IdentifiedLink> reader : readers) {
 122  0
                 shreddedReaders.add((ShreddedReader)reader);
 123  
             }
 124  
             
 125  0
             return new ShreddedCombiner(shreddedReaders, closeOnExit);
 126  
         }                  
 127  
         public IdentifiedLink clone(IdentifiedLink object) {
 128  0
             IdentifiedLink result = new IdentifiedLink();
 129  0
             if (object == null) return result;
 130  0
             result.identifier = object.identifier; 
 131  0
             result.url = object.url; 
 132  0
             result.anchorText = object.anchorText; 
 133  0
             return result;
 134  
         }                 
 135  
         public Class<IdentifiedLink> getOrderedClass() {
 136  0
             return IdentifiedLink.class;
 137  
         }                           
 138  
         public String[] getOrderSpec() {
 139  0
             return new String[] {"+url"};
 140  
         }
 141  
 
 142  
         public static String getSpecString() {
 143  0
             return "+url";
 144  
         }
 145  
                            
 146  
         public interface ShreddedProcessor extends Step {
 147  
             public void processUrl(String url) throws IOException;
 148  
             public void processTuple(String identifier, String anchorText) throws IOException;
 149  
             public void close() throws IOException;
 150  
         }    
 151  
         public interface ShreddedSource extends Step {
 152  
         }                                              
 153  
         
 154  0
         public static class ShreddedWriter implements ShreddedProcessor {
 155  
             ArrayOutput output;
 156  0
             ShreddedBuffer buffer = new ShreddedBuffer();
 157  
             String lastUrl;
 158  0
             boolean lastFlush = false;
 159  
             
 160  0
             public ShreddedWriter(ArrayOutput output) {
 161  0
                 this.output = output;
 162  0
             }                        
 163  
             
 164  
             public void close() throws IOException {
 165  0
                 flush();
 166  0
             }
 167  
             
 168  
             public void processUrl(String url) {
 169  0
                 lastUrl = url;
 170  0
                 buffer.processUrl(url);
 171  0
             }
 172  
             public final void processTuple(String identifier, String anchorText) throws IOException {
 173  0
                 if (lastFlush) {
 174  0
                     if(buffer.urls.size() == 0) buffer.processUrl(lastUrl);
 175  0
                     lastFlush = false;
 176  
                 }
 177  0
                 buffer.processTuple(identifier, anchorText);
 178  0
                 if (buffer.isFull())
 179  0
                     flush();
 180  0
             }
 181  
             public final void flushTuples(int pauseIndex) throws IOException {
 182  
                 
 183  0
                 while (buffer.getReadIndex() < pauseIndex) {
 184  
                            
 185  0
                     output.writeString(buffer.getIdentifier());
 186  0
                     output.writeString(buffer.getAnchorText());
 187  0
                     buffer.incrementTuple();
 188  
                 }
 189  0
             }  
 190  
             public final void flushUrl(int pauseIndex) throws IOException {
 191  0
                 while (buffer.getReadIndex() < pauseIndex) {
 192  0
                     int nextPause = buffer.getUrlEndIndex();
 193  0
                     int count = nextPause - buffer.getReadIndex();
 194  
                     
 195  0
                     output.writeString(buffer.getUrl());
 196  0
                     output.writeInt(count);
 197  0
                     buffer.incrementUrl();
 198  
                       
 199  0
                     flushTuples(nextPause);
 200  0
                     assert nextPause == buffer.getReadIndex();
 201  0
                 }
 202  0
             }
 203  
             public void flush() throws IOException { 
 204  0
                 flushUrl(buffer.getWriteIndex());
 205  0
                 buffer.reset(); 
 206  0
                 lastFlush = true;
 207  0
             }                           
 208  
         }
 209  0
         public static class ShreddedBuffer {
 210  0
             ArrayList<String> urls = new ArrayList();
 211  0
             ArrayList<Integer> urlTupleIdx = new ArrayList();
 212  0
             int urlReadIdx = 0;
 213  
                             
 214  
             String[] identifiers;
 215  
             String[] anchorTexts;
 216  0
             int writeTupleIndex = 0;
 217  0
             int readTupleIndex = 0;
 218  
             int batchSize;
 219  
 
 220  0
             public ShreddedBuffer(int batchSize) {
 221  0
                 this.batchSize = batchSize;
 222  
 
 223  0
                 identifiers = new String[batchSize];
 224  0
                 anchorTexts = new String[batchSize];
 225  0
             }                              
 226  
 
 227  
             public ShreddedBuffer() {    
 228  0
                 this(10000);
 229  0
             }                                                                                                                    
 230  
             
 231  
             public void processUrl(String url) {
 232  0
                 urls.add(url);
 233  0
                 urlTupleIdx.add(writeTupleIndex);
 234  0
             }                                      
 235  
             public void processTuple(String identifier, String anchorText) {
 236  0
                 assert urls.size() > 0;
 237  0
                 identifiers[writeTupleIndex] = identifier;
 238  0
                 anchorTexts[writeTupleIndex] = anchorText;
 239  0
                 writeTupleIndex++;
 240  0
             }
 241  
             public void resetData() {
 242  0
                 urls.clear();
 243  0
                 urlTupleIdx.clear();
 244  0
                 writeTupleIndex = 0;
 245  0
             }                  
 246  
                                  
 247  
             public void resetRead() {
 248  0
                 readTupleIndex = 0;
 249  0
                 urlReadIdx = 0;
 250  0
             } 
 251  
 
 252  
             public void reset() {
 253  0
                 resetData();
 254  0
                 resetRead();
 255  0
             } 
 256  
             public boolean isFull() {
 257  0
                 return writeTupleIndex >= batchSize;
 258  
             }
 259  
 
 260  
             public boolean isEmpty() {
 261  0
                 return writeTupleIndex == 0;
 262  
             }                          
 263  
 
 264  
             public boolean isAtEnd() {
 265  0
                 return readTupleIndex >= writeTupleIndex;
 266  
             }           
 267  
             public void incrementUrl() {
 268  0
                 urlReadIdx++;  
 269  0
             }                                                                                              
 270  
 
 271  
             public void autoIncrementUrl() {
 272  0
                 while (readTupleIndex >= getUrlEndIndex() && readTupleIndex < writeTupleIndex)
 273  0
                     urlReadIdx++;
 274  0
             }                 
 275  
             public void incrementTuple() {
 276  0
                 readTupleIndex++;
 277  0
             }                    
 278  
             public int getUrlEndIndex() {
 279  0
                 if ((urlReadIdx+1) >= urlTupleIdx.size())
 280  0
                     return writeTupleIndex;
 281  0
                 return urlTupleIdx.get(urlReadIdx+1);
 282  
             }
 283  
             public int getReadIndex() {
 284  0
                 return readTupleIndex;
 285  
             }   
 286  
 
 287  
             public int getWriteIndex() {
 288  0
                 return writeTupleIndex;
 289  
             } 
 290  
             public String getUrl() {
 291  0
                 assert readTupleIndex < writeTupleIndex;
 292  0
                 assert urlReadIdx < urls.size();
 293  
                 
 294  0
                 return urls.get(urlReadIdx);
 295  
             }
 296  
             public String getIdentifier() {
 297  0
                 assert readTupleIndex < writeTupleIndex;
 298  0
                 return identifiers[readTupleIndex];
 299  
             }                                         
 300  
             public String getAnchorText() {
 301  0
                 assert readTupleIndex < writeTupleIndex;
 302  0
                 return anchorTexts[readTupleIndex];
 303  
             }                                         
 304  
             public void copyTuples(int endIndex, ShreddedProcessor output) throws IOException {
 305  0
                 while (getReadIndex() < endIndex) {
 306  0
                    output.processTuple(getIdentifier(), getAnchorText());
 307  0
                    incrementTuple();
 308  
                 }
 309  0
             }                                                                           
 310  
             public void copyUntilIndexUrl(int endIndex, ShreddedProcessor output) throws IOException {
 311  0
                 while (getReadIndex() < endIndex) {
 312  0
                     output.processUrl(getUrl());
 313  0
                     assert getUrlEndIndex() <= endIndex;
 314  0
                     copyTuples(getUrlEndIndex(), output);
 315  0
                     incrementUrl();
 316  
                 }
 317  0
             }  
 318  
             public void copyUntilUrl(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 319  0
                 while (!isAtEnd()) {
 320  0
                     if (other != null) {   
 321  0
                         assert !other.isAtEnd();
 322  0
                         int c = + Utility.compare(getUrl(), other.getUrl());
 323  
                     
 324  0
                         if (c > 0) {
 325  0
                             break;   
 326  
                         }
 327  
                         
 328  0
                         output.processUrl(getUrl());
 329  
                                       
 330  0
                         copyTuples(getUrlEndIndex(), output);
 331  0
                     } else {
 332  0
                         output.processUrl(getUrl());
 333  0
                         copyTuples(getUrlEndIndex(), output);
 334  
                     }
 335  0
                     incrementUrl();  
 336  
                     
 337  
                
 338  
                 }
 339  0
             }
 340  
             public void copyUntil(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 341  0
                 copyUntilUrl(other, output);
 342  0
             }
 343  
             
 344  
         }                         
 345  0
         public static class ShreddedCombiner implements ReaderSource<IdentifiedLink>, ShreddedSource {   
 346  
             public ShreddedProcessor processor;
 347  
             Collection<ShreddedReader> readers;       
 348  0
             boolean closeOnExit = false;
 349  0
             boolean uninitialized = true;
 350  0
             PriorityQueue<ShreddedReader> queue = new PriorityQueue<ShreddedReader>();
 351  
             
 352  0
             public ShreddedCombiner(Collection<ShreddedReader> readers, boolean closeOnExit) {
 353  0
                 this.readers = readers;                                                       
 354  0
                 this.closeOnExit = closeOnExit;
 355  0
             }
 356  
                                   
 357  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 358  0
                 if (processor instanceof ShreddedProcessor) {
 359  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 360  0
                 } else if (processor instanceof IdentifiedLink.Processor) {
 361  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((IdentifiedLink.Processor) processor));
 362  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 363  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<IdentifiedLink>) processor));
 364  
                 } else {
 365  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 366  
                 }
 367  0
             }                                
 368  
             
 369  
             public Class<IdentifiedLink> getOutputClass() {
 370  0
                 return IdentifiedLink.class;
 371  
             }
 372  
             
 373  
             public void initialize() throws IOException {
 374  0
                 for (ShreddedReader reader : readers) {
 375  0
                     reader.fill();                                        
 376  
                     
 377  0
                     if (!reader.getBuffer().isAtEnd())
 378  0
                         queue.add(reader);
 379  
                 }   
 380  
 
 381  0
                 uninitialized = false;
 382  0
             }
 383  
 
 384  
             public void run() throws IOException {
 385  0
                 initialize();
 386  
                
 387  0
                 while (queue.size() > 0) {
 388  0
                     ShreddedReader top = queue.poll();
 389  0
                     ShreddedReader next = null;
 390  0
                     ShreddedBuffer nextBuffer = null; 
 391  
                     
 392  0
                     assert !top.getBuffer().isAtEnd();
 393  
                                                   
 394  0
                     if (queue.size() > 0) {
 395  0
                         next = queue.peek();
 396  0
                         nextBuffer = next.getBuffer();
 397  0
                         assert !nextBuffer.isAtEnd();
 398  
                     }
 399  
                     
 400  0
                     top.getBuffer().copyUntil(nextBuffer, processor);
 401  0
                     if (top.getBuffer().isAtEnd())
 402  0
                         top.fill();                 
 403  
                         
 404  0
                     if (!top.getBuffer().isAtEnd())
 405  0
                         queue.add(top);
 406  0
                 }              
 407  
                 
 408  0
                 if (closeOnExit)
 409  0
                     processor.close();
 410  0
             }
 411  
 
 412  
             public IdentifiedLink read() throws IOException {
 413  0
                 if (uninitialized)
 414  0
                     initialize();
 415  
 
 416  0
                 IdentifiedLink result = null;
 417  
 
 418  0
                 while (queue.size() > 0) {
 419  0
                     ShreddedReader top = queue.poll();
 420  0
                     result = top.read();
 421  
 
 422  0
                     if (result != null) {
 423  0
                         if (top.getBuffer().isAtEnd())
 424  0
                             top.fill();
 425  
 
 426  0
                         queue.offer(top);
 427  0
                         break;
 428  
                     } 
 429  0
                 }
 430  
 
 431  0
                 return result;
 432  
             }
 433  
         } 
 434  0
         public static class ShreddedReader implements Step, Comparable<ShreddedReader>, TypeReader<IdentifiedLink>, ShreddedSource {      
 435  
             public ShreddedProcessor processor;
 436  
             ShreddedBuffer buffer;
 437  0
             IdentifiedLink last = new IdentifiedLink();         
 438  0
             long updateUrlCount = -1;
 439  0
             long tupleCount = 0;
 440  0
             long bufferStartCount = 0;  
 441  
             ArrayInput input;
 442  
             
 443  0
             public ShreddedReader(ArrayInput input) {
 444  0
                 this.input = input; 
 445  0
                 this.buffer = new ShreddedBuffer();
 446  0
             }                               
 447  
             
 448  0
             public ShreddedReader(ArrayInput input, int bufferSize) { 
 449  0
                 this.input = input;
 450  0
                 this.buffer = new ShreddedBuffer(bufferSize);
 451  0
             }
 452  
                  
 453  
             public final int compareTo(ShreddedReader other) {
 454  0
                 ShreddedBuffer otherBuffer = other.getBuffer();
 455  
                 
 456  0
                 if (buffer.isAtEnd() && otherBuffer.isAtEnd()) {
 457  0
                     return 0;                 
 458  0
                 } else if (buffer.isAtEnd()) {
 459  0
                     return -1;
 460  0
                 } else if (otherBuffer.isAtEnd()) {
 461  0
                     return 1;
 462  
                 }
 463  
                                    
 464  0
                 int result = 0;
 465  
                 do {
 466  0
                     result = + Utility.compare(buffer.getUrl(), otherBuffer.getUrl());
 467  0
                     if(result != 0) break;
 468  
                 } while (false);                                             
 469  
                 
 470  0
                 return result;
 471  
             }
 472  
             
 473  
             public final ShreddedBuffer getBuffer() {
 474  0
                 return buffer;
 475  
             }                
 476  
             
 477  
             public final IdentifiedLink read() throws IOException {
 478  0
                 if (buffer.isAtEnd()) {
 479  0
                     fill();             
 480  
                 
 481  0
                     if (buffer.isAtEnd()) {
 482  0
                         return null;
 483  
                     }
 484  
                 }
 485  
                       
 486  0
                 assert !buffer.isAtEnd();
 487  0
                 IdentifiedLink result = new IdentifiedLink();
 488  
                 
 489  0
                 result.url = buffer.getUrl();
 490  0
                 result.identifier = buffer.getIdentifier();
 491  0
                 result.anchorText = buffer.getAnchorText();
 492  
                 
 493  0
                 buffer.incrementTuple();
 494  0
                 buffer.autoIncrementUrl();
 495  
                 
 496  0
                 return result;
 497  
             }           
 498  
             
 499  
             public final void fill() throws IOException {
 500  
                 try {   
 501  0
                     buffer.reset();
 502  
                     
 503  0
                     if (tupleCount != 0) {
 504  
                                                       
 505  0
                         if(updateUrlCount - tupleCount > 0) {
 506  0
                             buffer.urls.add(last.url);
 507  0
                             buffer.urlTupleIdx.add((int) (updateUrlCount - tupleCount));
 508  
                         }
 509  0
                         bufferStartCount = tupleCount;
 510  
                     }
 511  
                     
 512  0
                     while (!buffer.isFull()) {
 513  0
                         updateUrl();
 514  0
                         buffer.processTuple(input.readString(), input.readString());
 515  0
                         tupleCount++;
 516  
                     }
 517  0
                 } catch(EOFException e) {}
 518  0
             }
 519  
 
 520  
             public final void updateUrl() throws IOException {
 521  0
                 if (updateUrlCount > tupleCount)
 522  0
                     return;
 523  
                      
 524  0
                 last.url = input.readString();
 525  0
                 updateUrlCount = tupleCount + input.readInt();
 526  
                                       
 527  0
                 buffer.processUrl(last.url);
 528  0
             }
 529  
 
 530  
             public void run() throws IOException {
 531  
                 while (true) {
 532  0
                     fill();
 533  
                     
 534  0
                     if (buffer.isAtEnd())
 535  0
                         break;
 536  
                     
 537  0
                     buffer.copyUntil(null, processor);
 538  
                 }      
 539  0
                 processor.close();
 540  0
             }
 541  
             
 542  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 543  0
                 if (processor instanceof ShreddedProcessor) {
 544  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 545  0
                 } else if (processor instanceof IdentifiedLink.Processor) {
 546  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((IdentifiedLink.Processor) processor));
 547  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 548  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<IdentifiedLink>) processor));
 549  
                 } else {
 550  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 551  
                 }
 552  0
             }                                
 553  
             
 554  
             public Class<IdentifiedLink> getOutputClass() {
 555  0
                 return IdentifiedLink.class;
 556  
             }                
 557  
         }
 558  
         
 559  
         public static class DuplicateEliminator implements ShreddedProcessor {
 560  
             public ShreddedProcessor processor;
 561  0
             IdentifiedLink last = new IdentifiedLink();
 562  0
             boolean urlProcess = true;
 563  
                                            
 564  0
             public DuplicateEliminator() {}
 565  0
             public DuplicateEliminator(ShreddedProcessor processor) {
 566  0
                 this.processor = processor;
 567  0
             }
 568  
             
 569  
             public void setShreddedProcessor(ShreddedProcessor processor) {
 570  0
                 this.processor = processor;
 571  0
             }
 572  
 
 573  
             public void processUrl(String url) throws IOException {  
 574  0
                 if (urlProcess || Utility.compare(url, last.url) != 0) {
 575  0
                     last.url = url;
 576  0
                     processor.processUrl(url);
 577  0
                     urlProcess = false;
 578  
                 }
 579  0
             }  
 580  
             
 581  
             public void resetUrl() {
 582  0
                  urlProcess = true;
 583  0
             }                                                
 584  
                                
 585  
             public void processTuple(String identifier, String anchorText) throws IOException {
 586  0
                 processor.processTuple(identifier, anchorText);
 587  0
             } 
 588  
             
 589  
             public void close() throws IOException {
 590  0
                 processor.close();
 591  0
             }                    
 592  
         }
 593  
         public static class TupleUnshredder implements ShreddedProcessor {
 594  0
             IdentifiedLink last = new IdentifiedLink();
 595  
             public org.galagosearch.tupleflow.Processor<IdentifiedLink> processor;                               
 596  
             
 597  0
             public TupleUnshredder(IdentifiedLink.Processor processor) {
 598  0
                 this.processor = processor;
 599  0
             }         
 600  
             
 601  0
             public TupleUnshredder(org.galagosearch.tupleflow.Processor<IdentifiedLink> processor) {
 602  0
                 this.processor = processor;
 603  0
             }
 604  
             
 605  
             public IdentifiedLink clone(IdentifiedLink object) {
 606  0
                 IdentifiedLink result = new IdentifiedLink();
 607  0
                 if (object == null) return result;
 608  0
                 result.identifier = object.identifier; 
 609  0
                 result.url = object.url; 
 610  0
                 result.anchorText = object.anchorText; 
 611  0
                 return result;
 612  
             }                 
 613  
             
 614  
             public void processUrl(String url) throws IOException {
 615  0
                 last.url = url;
 616  0
             }   
 617  
                 
 618  
             
 619  
             public void processTuple(String identifier, String anchorText) throws IOException {
 620  0
                 last.identifier = identifier;
 621  0
                 last.anchorText = anchorText;
 622  0
                 processor.process(clone(last));
 623  0
             }               
 624  
             
 625  
             public void close() throws IOException {
 626  0
                 processor.close();
 627  0
             }
 628  
         }     
 629  0
         public static class TupleShredder implements Processor {
 630  0
             IdentifiedLink last = new IdentifiedLink();
 631  
             public ShreddedProcessor processor;
 632  
             
 633  0
             public TupleShredder(ShreddedProcessor processor) {
 634  0
                 this.processor = processor;
 635  0
             }                              
 636  
             
 637  
             public IdentifiedLink clone(IdentifiedLink object) {
 638  0
                 IdentifiedLink result = new IdentifiedLink();
 639  0
                 if (object == null) return result;
 640  0
                 result.identifier = object.identifier; 
 641  0
                 result.url = object.url; 
 642  0
                 result.anchorText = object.anchorText; 
 643  0
                 return result;
 644  
             }                 
 645  
             
 646  
             public void process(IdentifiedLink object) throws IOException {                                                                                                                                                   
 647  0
                 boolean processAll = false;
 648  0
                 if(last == null || Utility.compare(last.url, object.url) != 0 || processAll) { processor.processUrl(object.url); processAll = true; }
 649  0
                 processor.processTuple(object.identifier, object.anchorText);                                         
 650  0
             }
 651  
                           
 652  
             public Class<IdentifiedLink> getInputClass() {
 653  0
                 return IdentifiedLink.class;
 654  
             }
 655  
             
 656  
             public void close() throws IOException {
 657  0
                 processor.close();
 658  0
             }                     
 659  
         }
 660  
     } 
 661  
 }