Coverage Report - org.galagosearch.core.types.DocumentNumberWordInteger
 
Classes in this File Line Coverage Branch Coverage Complexity
DocumentNumberWordInteger
0%
0/18
0%
0/8
0
DocumentNumberWordInteger$DocumentOrder
0%
0/24
0%
0/4
0
DocumentNumberWordInteger$DocumentOrder$1
0%
0/5
0%
0/2
0
DocumentNumberWordInteger$DocumentOrder$2
0%
0/5
0%
0/2
0
DocumentNumberWordInteger$DocumentOrder$DuplicateEliminator
0%
0/19
0%
0/4
0
DocumentNumberWordInteger$DocumentOrder$OrderedWriterClass
0%
0/14
0%
0/6
0
DocumentNumberWordInteger$DocumentOrder$ShreddedBuffer
0%
0/78
0%
0/50
0
DocumentNumberWordInteger$DocumentOrder$ShreddedCombiner
0%
0/55
0%
0/36
0
DocumentNumberWordInteger$DocumentOrder$ShreddedProcessor
N/A
N/A
0
DocumentNumberWordInteger$DocumentOrder$ShreddedReader
0%
0/70
0%
0/34
0
DocumentNumberWordInteger$DocumentOrder$ShreddedSource
N/A
N/A
0
DocumentNumberWordInteger$DocumentOrder$ShreddedWriter
0%
0/37
0%
0/14
0
DocumentNumberWordInteger$DocumentOrder$TupleShredder
0%
0/18
0%
0/8
0
DocumentNumberWordInteger$DocumentOrder$TupleUnshredder
0%
0/21
0%
0/2
0
DocumentNumberWordInteger$Processor
N/A
N/A
0
DocumentNumberWordInteger$Source
N/A
N/A
0
DocumentNumberWordInteger$Unordered
0%
0/23
0%
0/4
0
DocumentNumberWordInteger$Unordered$1
0%
0/3
N/A
0
DocumentNumberWordInteger$Unordered$2
0%
0/3
N/A
0
DocumentNumberWordInteger$Unordered$DuplicateEliminator
0%
0/11
N/A
0
DocumentNumberWordInteger$Unordered$OrderedWriterClass
0%
0/13
N/A
0
DocumentNumberWordInteger$Unordered$ShreddedBuffer
0%
0/41
0%
0/20
0
DocumentNumberWordInteger$Unordered$ShreddedCombiner
0%
0/55
0%
0/36
0
DocumentNumberWordInteger$Unordered$ShreddedProcessor
N/A
N/A
0
DocumentNumberWordInteger$Unordered$ShreddedReader
0%
0/56
0%
0/28
0
DocumentNumberWordInteger$Unordered$ShreddedSource
N/A
N/A
0
DocumentNumberWordInteger$Unordered$ShreddedWriter
0%
0/23
0%
0/6
0
DocumentNumberWordInteger$Unordered$TupleShredder
0%
0/17
0%
0/2
0
DocumentNumberWordInteger$Unordered$TupleUnshredder
0%
0/20
0%
0/2
0
DocumentNumberWordInteger$ValueOrder
0%
0/24
0%
0/4
0
DocumentNumberWordInteger$ValueOrder$1
0%
0/5
0%
0/2
0
DocumentNumberWordInteger$ValueOrder$2
0%
0/5
0%
0/2
0
DocumentNumberWordInteger$ValueOrder$DuplicateEliminator
0%
0/19
0%
0/4
0
DocumentNumberWordInteger$ValueOrder$OrderedWriterClass
0%
0/14
0%
0/6
0
DocumentNumberWordInteger$ValueOrder$ShreddedBuffer
0%
0/78
0%
0/50
0
DocumentNumberWordInteger$ValueOrder$ShreddedCombiner
0%
0/55
0%
0/36
0
DocumentNumberWordInteger$ValueOrder$ShreddedProcessor
N/A
N/A
0
DocumentNumberWordInteger$ValueOrder$ShreddedReader
0%
0/70
0%
0/34
0
DocumentNumberWordInteger$ValueOrder$ShreddedSource
N/A
N/A
0
DocumentNumberWordInteger$ValueOrder$ShreddedWriter
0%
0/37
0%
0/14
0
DocumentNumberWordInteger$ValueOrder$TupleShredder
0%
0/18
0%
0/8
0
DocumentNumberWordInteger$ValueOrder$TupleUnshredder
0%
0/21
0%
0/2
0
DocumentNumberWordInteger$WordDocumentOrder
0%
0/25
0%
0/4
0
DocumentNumberWordInteger$WordDocumentOrder$1
0%
0/7
0%
0/4
0
DocumentNumberWordInteger$WordDocumentOrder$2
0%
0/7
0%
0/4
0
DocumentNumberWordInteger$WordDocumentOrder$DuplicateEliminator
0%
0/29
0%
0/8
0
DocumentNumberWordInteger$WordDocumentOrder$OrderedWriterClass
0%
0/15
0%
0/12
0
DocumentNumberWordInteger$WordDocumentOrder$ShreddedBuffer
0%
0/121
0%
0/86
0
DocumentNumberWordInteger$WordDocumentOrder$ShreddedCombiner
0%
0/55
0%
0/36
0
DocumentNumberWordInteger$WordDocumentOrder$ShreddedProcessor
N/A
N/A
0
DocumentNumberWordInteger$WordDocumentOrder$ShreddedReader
0%
0/84
0%
0/40
0
DocumentNumberWordInteger$WordDocumentOrder$ShreddedSource
N/A
N/A
0
DocumentNumberWordInteger$WordDocumentOrder$ShreddedWriter
0%
0/50
0%
0/22
0
DocumentNumberWordInteger$WordDocumentOrder$TupleShredder
0%
0/19
0%
0/14
0
DocumentNumberWordInteger$WordDocumentOrder$TupleUnshredder
0%
0/22
0%
0/2
0
 
 1  
 // This file was automatically generated with the command: 
 2  
 //     java org.galagosearch.tupleflow.typebuilder.TypeBuilderMojo ...
 3  
 package org.galagosearch.core.types;
 4  
 
 5  
 import org.galagosearch.tupleflow.Utility;
 6  
 import org.galagosearch.tupleflow.ArrayInput;
 7  
 import org.galagosearch.tupleflow.ArrayOutput;
 8  
 import org.galagosearch.tupleflow.Order;   
 9  
 import org.galagosearch.tupleflow.OrderedWriter;
 10  
 import org.galagosearch.tupleflow.Type; 
 11  
 import org.galagosearch.tupleflow.TypeReader;
 12  
 import org.galagosearch.tupleflow.Step; 
 13  
 import org.galagosearch.tupleflow.IncompatibleProcessorException;
 14  
 import org.galagosearch.tupleflow.ReaderSource;
 15  
 import java.io.IOException;             
 16  
 import java.io.EOFException;
 17  
 import java.io.UnsupportedEncodingException;
 18  
 import java.util.ArrayList;
 19  
 import java.util.Arrays;   
 20  
 import java.util.Comparator;
 21  
 import java.util.PriorityQueue;
 22  
 import java.util.Collection;
 23  
 
 24  
 public class DocumentNumberWordInteger implements Type<DocumentNumberWordInteger> {
 25  
     public byte[] word;
 26  
     public int document;
 27  
     public int value; 
 28  
     
 29  0
     public DocumentNumberWordInteger() {}
 30  0
     public DocumentNumberWordInteger(byte[] word, int document, int value) {
 31  0
         this.word = word;
 32  0
         this.document = document;
 33  0
         this.value = value;
 34  0
     }  
 35  
     
 36  
     public String toString() {
 37  
         try {
 38  0
             return String.format("%s,%d,%d",
 39  
                                    new String(word, "UTF-8"), document, value);
 40  0
         } catch(UnsupportedEncodingException e) {
 41  0
             throw new RuntimeException("Couldn't convert string to UTF-8.");
 42  
         }
 43  
     } 
 44  
 
 45  
     public Order<DocumentNumberWordInteger> getOrder(String... spec) {
 46  0
         if (Arrays.equals(spec, new String[] {  })) {
 47  0
             return new Unordered();
 48  
         }
 49  0
         if (Arrays.equals(spec, new String[] { "+word", "+document" })) {
 50  0
             return new WordDocumentOrder();
 51  
         }
 52  0
         if (Arrays.equals(spec, new String[] { "+document" })) {
 53  0
             return new DocumentOrder();
 54  
         }
 55  0
         if (Arrays.equals(spec, new String[] { "+value" })) {
 56  0
             return new ValueOrder();
 57  
         }
 58  0
         return null;
 59  
     } 
 60  
       
 61  
     public interface Processor extends Step, org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> {
 62  
         public void process(DocumentNumberWordInteger object) throws IOException;
 63  
         public void close() throws IOException;
 64  
     }                        
 65  
     public interface Source extends Step {
 66  
     }
 67  0
     public static class Unordered implements Order<DocumentNumberWordInteger> {
 68  
         public int hash(DocumentNumberWordInteger object) {
 69  0
             int h = 0;
 70  0
             return h;
 71  
         } 
 72  
         public Comparator<DocumentNumberWordInteger> greaterThan() {
 73  0
             return new Comparator<DocumentNumberWordInteger>() {
 74  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 75  0
                     int result = 0;
 76  
                     do {
 77  
                     } while (false);
 78  0
                     return -result;
 79  
                 }
 80  
             };
 81  
         }     
 82  
         public Comparator<DocumentNumberWordInteger> lessThan() {
 83  0
             return new Comparator<DocumentNumberWordInteger>() {
 84  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 85  0
                     int result = 0;
 86  
                     do {
 87  
                     } while (false);
 88  0
                     return result;
 89  
                 }
 90  
             };
 91  
         }     
 92  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input) {
 93  0
             return new ShreddedReader(_input);
 94  
         }    
 95  
 
 96  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input, int bufferSize) {
 97  0
             return new ShreddedReader(_input, bufferSize);
 98  
         }    
 99  
         public OrderedWriter<DocumentNumberWordInteger> orderedWriter(ArrayOutput _output) {
 100  0
             ShreddedWriter w = new ShreddedWriter(_output);
 101  0
             return new OrderedWriterClass(w); 
 102  
         }                                    
 103  0
         public static class OrderedWriterClass extends OrderedWriter< DocumentNumberWordInteger > {
 104  0
             DocumentNumberWordInteger last = null;
 105  0
             ShreddedWriter shreddedWriter = null; 
 106  
             
 107  0
             public OrderedWriterClass(ShreddedWriter s) {
 108  0
                 this.shreddedWriter = s;
 109  0
             }
 110  
             
 111  
             public void process(DocumentNumberWordInteger object) throws IOException {
 112  0
                boolean processAll = false;
 113  0
                shreddedWriter.processTuple(object.word, object.document, object.value);
 114  0
                last = object;
 115  0
             }           
 116  
                  
 117  
             public void close() throws IOException {
 118  0
                 shreddedWriter.close();
 119  0
             }
 120  
             
 121  
             public Class<DocumentNumberWordInteger> getInputClass() {
 122  0
                 return DocumentNumberWordInteger.class;
 123  
             }
 124  
         } 
 125  
         public ReaderSource<DocumentNumberWordInteger> orderedCombiner(Collection<TypeReader<DocumentNumberWordInteger>> readers, boolean closeOnExit) {
 126  0
             ArrayList<ShreddedReader> shreddedReaders = new ArrayList();
 127  
             
 128  0
             for (TypeReader<DocumentNumberWordInteger> reader : readers) {
 129  0
                 shreddedReaders.add((ShreddedReader)reader);
 130  
             }
 131  
             
 132  0
             return new ShreddedCombiner(shreddedReaders, closeOnExit);
 133  
         }                  
 134  
         public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 135  0
             DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 136  0
             if (object == null) return result;
 137  0
             result.word = object.word; 
 138  0
             result.document = object.document; 
 139  0
             result.value = object.value; 
 140  0
             return result;
 141  
         }                 
 142  
         public Class<DocumentNumberWordInteger> getOrderedClass() {
 143  0
             return DocumentNumberWordInteger.class;
 144  
         }                           
 145  
         public String[] getOrderSpec() {
 146  0
             return new String[] {};
 147  
         }
 148  
 
 149  
         public static String getSpecString() {
 150  0
             return "";
 151  
         }
 152  
                            
 153  
         public interface ShreddedProcessor extends Step {
 154  
             public void processTuple(byte[] word, int document, int value) throws IOException;
 155  
             public void close() throws IOException;
 156  
         }    
 157  
         public interface ShreddedSource extends Step {
 158  
         }                                              
 159  
         
 160  
         public static class ShreddedWriter implements ShreddedProcessor {
 161  
             ArrayOutput output;
 162  0
             ShreddedBuffer buffer = new ShreddedBuffer();
 163  0
             boolean lastFlush = false;
 164  
             
 165  0
             public ShreddedWriter(ArrayOutput output) {
 166  0
                 this.output = output;
 167  0
             }                        
 168  
             
 169  
             public void close() throws IOException {
 170  0
                 flush();
 171  0
             }
 172  
             
 173  
             public final void processTuple(byte[] word, int document, int value) throws IOException {
 174  0
                 if (lastFlush) {
 175  0
                     lastFlush = false;
 176  
                 }
 177  0
                 buffer.processTuple(word, document, value);
 178  0
                 if (buffer.isFull())
 179  0
                     flush();
 180  0
             }
 181  
             public final void flushTuples(int pauseIndex) throws IOException {
 182  
                 
 183  0
                 while (buffer.getReadIndex() < pauseIndex) {
 184  
                            
 185  0
                     output.writeBytes(buffer.getWord());
 186  0
                     output.writeInt(buffer.getDocument());
 187  0
                     output.writeInt(buffer.getValue());
 188  0
                     buffer.incrementTuple();
 189  
                 }
 190  0
             }  
 191  
             public void flush() throws IOException { 
 192  0
                 flushTuples(buffer.getWriteIndex());
 193  0
                 buffer.reset(); 
 194  0
                 lastFlush = true;
 195  0
             }                           
 196  
         }
 197  0
         public static class ShreddedBuffer {
 198  
                             
 199  
             byte[][] words;
 200  
             int[] documents;
 201  
             int[] values;
 202  0
             int writeTupleIndex = 0;
 203  0
             int readTupleIndex = 0;
 204  
             int batchSize;
 205  
 
 206  0
             public ShreddedBuffer(int batchSize) {
 207  0
                 this.batchSize = batchSize;
 208  
 
 209  0
                 words = new byte[batchSize][];
 210  0
                 documents = new int[batchSize];
 211  0
                 values = new int[batchSize];
 212  0
             }                              
 213  
 
 214  
             public ShreddedBuffer() {    
 215  0
                 this(10000);
 216  0
             }                                                                                                                    
 217  
             
 218  
             public void processTuple(byte[] word, int document, int value) {
 219  0
                 words[writeTupleIndex] = word;
 220  0
                 documents[writeTupleIndex] = document;
 221  0
                 values[writeTupleIndex] = value;
 222  0
                 writeTupleIndex++;
 223  0
             }
 224  
             public void resetData() {
 225  0
                 writeTupleIndex = 0;
 226  0
             }                  
 227  
                                  
 228  
             public void resetRead() {
 229  0
                 readTupleIndex = 0;
 230  0
             } 
 231  
 
 232  
             public void reset() {
 233  0
                 resetData();
 234  0
                 resetRead();
 235  0
             } 
 236  
             public boolean isFull() {
 237  0
                 return writeTupleIndex >= batchSize;
 238  
             }
 239  
 
 240  
             public boolean isEmpty() {
 241  0
                 return writeTupleIndex == 0;
 242  
             }                          
 243  
 
 244  
             public boolean isAtEnd() {
 245  0
                 return readTupleIndex >= writeTupleIndex;
 246  
             }           
 247  
             public void incrementTuple() {
 248  0
                 readTupleIndex++;
 249  0
             }                    
 250  
             public int getReadIndex() {
 251  0
                 return readTupleIndex;
 252  
             }   
 253  
 
 254  
             public int getWriteIndex() {
 255  0
                 return writeTupleIndex;
 256  
             } 
 257  
             public byte[] getWord() {
 258  0
                 assert readTupleIndex < writeTupleIndex;
 259  0
                 return words[readTupleIndex];
 260  
             }                                         
 261  
             public int getDocument() {
 262  0
                 assert readTupleIndex < writeTupleIndex;
 263  0
                 return documents[readTupleIndex];
 264  
             }                                         
 265  
             public int getValue() {
 266  0
                 assert readTupleIndex < writeTupleIndex;
 267  0
                 return values[readTupleIndex];
 268  
             }                                         
 269  
             public void copyTuples(int endIndex, ShreddedProcessor output) throws IOException {
 270  0
                 while (getReadIndex() < endIndex) {
 271  0
                    output.processTuple(getWord(), getDocument(), getValue());
 272  0
                    incrementTuple();
 273  
                 }
 274  0
             }                                                                           
 275  
              
 276  
             public void copyUntil(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 277  0
             }
 278  
             
 279  
         }                         
 280  0
         public static class ShreddedCombiner implements ReaderSource<DocumentNumberWordInteger>, ShreddedSource {   
 281  
             public ShreddedProcessor processor;
 282  
             Collection<ShreddedReader> readers;       
 283  0
             boolean closeOnExit = false;
 284  0
             boolean uninitialized = true;
 285  0
             PriorityQueue<ShreddedReader> queue = new PriorityQueue<ShreddedReader>();
 286  
             
 287  0
             public ShreddedCombiner(Collection<ShreddedReader> readers, boolean closeOnExit) {
 288  0
                 this.readers = readers;                                                       
 289  0
                 this.closeOnExit = closeOnExit;
 290  0
             }
 291  
                                   
 292  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 293  0
                 if (processor instanceof ShreddedProcessor) {
 294  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 295  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 296  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 297  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 298  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 299  
                 } else {
 300  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 301  
                 }
 302  0
             }                                
 303  
             
 304  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 305  0
                 return DocumentNumberWordInteger.class;
 306  
             }
 307  
             
 308  
             public void initialize() throws IOException {
 309  0
                 for (ShreddedReader reader : readers) {
 310  0
                     reader.fill();                                        
 311  
                     
 312  0
                     if (!reader.getBuffer().isAtEnd())
 313  0
                         queue.add(reader);
 314  
                 }   
 315  
 
 316  0
                 uninitialized = false;
 317  0
             }
 318  
 
 319  
             public void run() throws IOException {
 320  0
                 initialize();
 321  
                
 322  0
                 while (queue.size() > 0) {
 323  0
                     ShreddedReader top = queue.poll();
 324  0
                     ShreddedReader next = null;
 325  0
                     ShreddedBuffer nextBuffer = null; 
 326  
                     
 327  0
                     assert !top.getBuffer().isAtEnd();
 328  
                                                   
 329  0
                     if (queue.size() > 0) {
 330  0
                         next = queue.peek();
 331  0
                         nextBuffer = next.getBuffer();
 332  0
                         assert !nextBuffer.isAtEnd();
 333  
                     }
 334  
                     
 335  0
                     top.getBuffer().copyUntil(nextBuffer, processor);
 336  0
                     if (top.getBuffer().isAtEnd())
 337  0
                         top.fill();                 
 338  
                         
 339  0
                     if (!top.getBuffer().isAtEnd())
 340  0
                         queue.add(top);
 341  0
                 }              
 342  
                 
 343  0
                 if (closeOnExit)
 344  0
                     processor.close();
 345  0
             }
 346  
 
 347  
             public DocumentNumberWordInteger read() throws IOException {
 348  0
                 if (uninitialized)
 349  0
                     initialize();
 350  
 
 351  0
                 DocumentNumberWordInteger result = null;
 352  
 
 353  0
                 while (queue.size() > 0) {
 354  0
                     ShreddedReader top = queue.poll();
 355  0
                     result = top.read();
 356  
 
 357  0
                     if (result != null) {
 358  0
                         if (top.getBuffer().isAtEnd())
 359  0
                             top.fill();
 360  
 
 361  0
                         queue.offer(top);
 362  0
                         break;
 363  
                     } 
 364  0
                 }
 365  
 
 366  0
                 return result;
 367  
             }
 368  
         } 
 369  0
         public static class ShreddedReader implements Step, Comparable<ShreddedReader>, TypeReader<DocumentNumberWordInteger>, ShreddedSource {      
 370  
             public ShreddedProcessor processor;
 371  
             ShreddedBuffer buffer;
 372  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();         
 373  0
             long tupleCount = 0;
 374  0
             long bufferStartCount = 0;  
 375  
             ArrayInput input;
 376  
             
 377  0
             public ShreddedReader(ArrayInput input) {
 378  0
                 this.input = input; 
 379  0
                 this.buffer = new ShreddedBuffer();
 380  0
             }                               
 381  
             
 382  0
             public ShreddedReader(ArrayInput input, int bufferSize) { 
 383  0
                 this.input = input;
 384  0
                 this.buffer = new ShreddedBuffer(bufferSize);
 385  0
             }
 386  
                  
 387  
             public final int compareTo(ShreddedReader other) {
 388  0
                 ShreddedBuffer otherBuffer = other.getBuffer();
 389  
                 
 390  0
                 if (buffer.isAtEnd() && otherBuffer.isAtEnd()) {
 391  0
                     return 0;                 
 392  0
                 } else if (buffer.isAtEnd()) {
 393  0
                     return -1;
 394  0
                 } else if (otherBuffer.isAtEnd()) {
 395  0
                     return 1;
 396  
                 }
 397  
                                    
 398  0
                 int result = 0;
 399  
                 do {
 400  
                 } while (false);                                             
 401  
                 
 402  0
                 return result;
 403  
             }
 404  
             
 405  
             public final ShreddedBuffer getBuffer() {
 406  0
                 return buffer;
 407  
             }                
 408  
             
 409  
             public final DocumentNumberWordInteger read() throws IOException {
 410  0
                 if (buffer.isAtEnd()) {
 411  0
                     fill();             
 412  
                 
 413  0
                     if (buffer.isAtEnd()) {
 414  0
                         return null;
 415  
                     }
 416  
                 }
 417  
                       
 418  0
                 assert !buffer.isAtEnd();
 419  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 420  
                 
 421  0
                 result.word = buffer.getWord();
 422  0
                 result.document = buffer.getDocument();
 423  0
                 result.value = buffer.getValue();
 424  
                 
 425  0
                 buffer.incrementTuple();
 426  
                 
 427  0
                 return result;
 428  
             }           
 429  
             
 430  
             public final void fill() throws IOException {
 431  
                 try {   
 432  0
                     buffer.reset();
 433  
                     
 434  0
                     if (tupleCount != 0) {
 435  0
                         bufferStartCount = tupleCount;
 436  
                     }
 437  
                     
 438  0
                     while (!buffer.isFull()) {
 439  0
                         buffer.processTuple(input.readBytes(), input.readInt(), input.readInt());
 440  0
                         tupleCount++;
 441  
                     }
 442  0
                 } catch(EOFException e) {}
 443  0
             }
 444  
 
 445  
 
 446  
             public void run() throws IOException {
 447  
                 while (true) {
 448  0
                     fill();
 449  
                     
 450  0
                     if (buffer.isAtEnd())
 451  0
                         break;
 452  
                     
 453  0
                     buffer.copyUntil(null, processor);
 454  
                 }      
 455  0
                 processor.close();
 456  0
             }
 457  
             
 458  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 459  0
                 if (processor instanceof ShreddedProcessor) {
 460  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 461  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 462  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 463  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 464  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 465  
                 } else {
 466  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 467  
                 }
 468  0
             }                                
 469  
             
 470  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 471  0
                 return DocumentNumberWordInteger.class;
 472  
             }                
 473  
         }
 474  
         
 475  
         public static class DuplicateEliminator implements ShreddedProcessor {
 476  
             public ShreddedProcessor processor;
 477  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 478  
                                            
 479  0
             public DuplicateEliminator() {}
 480  0
             public DuplicateEliminator(ShreddedProcessor processor) {
 481  0
                 this.processor = processor;
 482  0
             }
 483  
             
 484  
             public void setShreddedProcessor(ShreddedProcessor processor) {
 485  0
                 this.processor = processor;
 486  0
             }
 487  
 
 488  
           
 489  
             
 490  
                                
 491  
             public void processTuple(byte[] word, int document, int value) throws IOException {
 492  0
                 processor.processTuple(word, document, value);
 493  0
             } 
 494  
             
 495  
             public void close() throws IOException {
 496  0
                 processor.close();
 497  0
             }                    
 498  
         }
 499  
         public static class TupleUnshredder implements ShreddedProcessor {
 500  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 501  
             public org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor;                               
 502  
             
 503  0
             public TupleUnshredder(DocumentNumberWordInteger.Processor processor) {
 504  0
                 this.processor = processor;
 505  0
             }         
 506  
             
 507  0
             public TupleUnshredder(org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor) {
 508  0
                 this.processor = processor;
 509  0
             }
 510  
             
 511  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 512  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 513  0
                 if (object == null) return result;
 514  0
                 result.word = object.word; 
 515  0
                 result.document = object.document; 
 516  0
                 result.value = object.value; 
 517  0
                 return result;
 518  
             }                 
 519  
             
 520  
             
 521  
             public void processTuple(byte[] word, int document, int value) throws IOException {
 522  0
                 last.word = word;
 523  0
                 last.document = document;
 524  0
                 last.value = value;
 525  0
                 processor.process(clone(last));
 526  0
             }               
 527  
             
 528  
             public void close() throws IOException {
 529  0
                 processor.close();
 530  0
             }
 531  
         }     
 532  0
         public static class TupleShredder implements Processor {
 533  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 534  
             public ShreddedProcessor processor;
 535  
             
 536  0
             public TupleShredder(ShreddedProcessor processor) {
 537  0
                 this.processor = processor;
 538  0
             }                              
 539  
             
 540  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 541  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 542  0
                 if (object == null) return result;
 543  0
                 result.word = object.word; 
 544  0
                 result.document = object.document; 
 545  0
                 result.value = object.value; 
 546  0
                 return result;
 547  
             }                 
 548  
             
 549  
             public void process(DocumentNumberWordInteger object) throws IOException {                                                                                                                                                   
 550  0
                 boolean processAll = false;
 551  0
                 processor.processTuple(object.word, object.document, object.value);                                         
 552  0
             }
 553  
                           
 554  
             public Class<DocumentNumberWordInteger> getInputClass() {
 555  0
                 return DocumentNumberWordInteger.class;
 556  
             }
 557  
             
 558  
             public void close() throws IOException {
 559  0
                 processor.close();
 560  0
             }                     
 561  
         }
 562  
     } 
 563  0
     public static class WordDocumentOrder implements Order<DocumentNumberWordInteger> {
 564  
         public int hash(DocumentNumberWordInteger object) {
 565  0
             int h = 0;
 566  0
             h += Utility.hash(object.word);
 567  0
             h += Utility.hash(object.document);
 568  0
             return h;
 569  
         } 
 570  
         public Comparator<DocumentNumberWordInteger> greaterThan() {
 571  0
             return new Comparator<DocumentNumberWordInteger>() {
 572  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 573  0
                     int result = 0;
 574  
                     do {
 575  0
                         result = + Utility.compare(one.word, two.word);
 576  0
                         if(result != 0) break;
 577  0
                         result = + Utility.compare(one.document, two.document);
 578  0
                         if(result != 0) break;
 579  
                     } while (false);
 580  0
                     return -result;
 581  
                 }
 582  
             };
 583  
         }     
 584  
         public Comparator<DocumentNumberWordInteger> lessThan() {
 585  0
             return new Comparator<DocumentNumberWordInteger>() {
 586  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 587  0
                     int result = 0;
 588  
                     do {
 589  0
                         result = + Utility.compare(one.word, two.word);
 590  0
                         if(result != 0) break;
 591  0
                         result = + Utility.compare(one.document, two.document);
 592  0
                         if(result != 0) break;
 593  
                     } while (false);
 594  0
                     return result;
 595  
                 }
 596  
             };
 597  
         }     
 598  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input) {
 599  0
             return new ShreddedReader(_input);
 600  
         }    
 601  
 
 602  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input, int bufferSize) {
 603  0
             return new ShreddedReader(_input, bufferSize);
 604  
         }    
 605  
         public OrderedWriter<DocumentNumberWordInteger> orderedWriter(ArrayOutput _output) {
 606  0
             ShreddedWriter w = new ShreddedWriter(_output);
 607  0
             return new OrderedWriterClass(w); 
 608  
         }                                    
 609  0
         public static class OrderedWriterClass extends OrderedWriter< DocumentNumberWordInteger > {
 610  0
             DocumentNumberWordInteger last = null;
 611  0
             ShreddedWriter shreddedWriter = null; 
 612  
             
 613  0
             public OrderedWriterClass(ShreddedWriter s) {
 614  0
                 this.shreddedWriter = s;
 615  0
             }
 616  
             
 617  
             public void process(DocumentNumberWordInteger object) throws IOException {
 618  0
                boolean processAll = false;
 619  0
                if (processAll || last == null || 0 != Utility.compare(object.word, last.word)) { processAll = true; shreddedWriter.processWord(object.word); }
 620  0
                if (processAll || last == null || 0 != Utility.compare(object.document, last.document)) { processAll = true; shreddedWriter.processDocument(object.document); }
 621  0
                shreddedWriter.processTuple(object.value);
 622  0
                last = object;
 623  0
             }           
 624  
                  
 625  
             public void close() throws IOException {
 626  0
                 shreddedWriter.close();
 627  0
             }
 628  
             
 629  
             public Class<DocumentNumberWordInteger> getInputClass() {
 630  0
                 return DocumentNumberWordInteger.class;
 631  
             }
 632  
         } 
 633  
         public ReaderSource<DocumentNumberWordInteger> orderedCombiner(Collection<TypeReader<DocumentNumberWordInteger>> readers, boolean closeOnExit) {
 634  0
             ArrayList<ShreddedReader> shreddedReaders = new ArrayList();
 635  
             
 636  0
             for (TypeReader<DocumentNumberWordInteger> reader : readers) {
 637  0
                 shreddedReaders.add((ShreddedReader)reader);
 638  
             }
 639  
             
 640  0
             return new ShreddedCombiner(shreddedReaders, closeOnExit);
 641  
         }                  
 642  
         public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 643  0
             DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 644  0
             if (object == null) return result;
 645  0
             result.word = object.word; 
 646  0
             result.document = object.document; 
 647  0
             result.value = object.value; 
 648  0
             return result;
 649  
         }                 
 650  
         public Class<DocumentNumberWordInteger> getOrderedClass() {
 651  0
             return DocumentNumberWordInteger.class;
 652  
         }                           
 653  
         public String[] getOrderSpec() {
 654  0
             return new String[] {"+word", "+document"};
 655  
         }
 656  
 
 657  
         public static String getSpecString() {
 658  0
             return "+word +document";
 659  
         }
 660  
                            
 661  
         public interface ShreddedProcessor extends Step {
 662  
             public void processWord(byte[] word) throws IOException;
 663  
             public void processDocument(int document) throws IOException;
 664  
             public void processTuple(int value) throws IOException;
 665  
             public void close() throws IOException;
 666  
         }    
 667  
         public interface ShreddedSource extends Step {
 668  
         }                                              
 669  
         
 670  0
         public static class ShreddedWriter implements ShreddedProcessor {
 671  
             ArrayOutput output;
 672  0
             ShreddedBuffer buffer = new ShreddedBuffer();
 673  
             byte[] lastWord;
 674  
             int lastDocument;
 675  0
             boolean lastFlush = false;
 676  
             
 677  0
             public ShreddedWriter(ArrayOutput output) {
 678  0
                 this.output = output;
 679  0
             }                        
 680  
             
 681  
             public void close() throws IOException {
 682  0
                 flush();
 683  0
             }
 684  
             
 685  
             public void processWord(byte[] word) {
 686  0
                 lastWord = word;
 687  0
                 buffer.processWord(word);
 688  0
             }
 689  
             public void processDocument(int document) {
 690  0
                 lastDocument = document;
 691  0
                 buffer.processDocument(document);
 692  0
             }
 693  
             public final void processTuple(int value) throws IOException {
 694  0
                 if (lastFlush) {
 695  0
                     if(buffer.words.size() == 0) buffer.processWord(lastWord);
 696  0
                     if(buffer.documents.size() == 0) buffer.processDocument(lastDocument);
 697  0
                     lastFlush = false;
 698  
                 }
 699  0
                 buffer.processTuple(value);
 700  0
                 if (buffer.isFull())
 701  0
                     flush();
 702  0
             }
 703  
             public final void flushTuples(int pauseIndex) throws IOException {
 704  
                 
 705  0
                 while (buffer.getReadIndex() < pauseIndex) {
 706  
                            
 707  0
                     output.writeInt(buffer.getValue());
 708  0
                     buffer.incrementTuple();
 709  
                 }
 710  0
             }  
 711  
             public final void flushWord(int pauseIndex) throws IOException {
 712  0
                 while (buffer.getReadIndex() < pauseIndex) {
 713  0
                     int nextPause = buffer.getWordEndIndex();
 714  0
                     int count = nextPause - buffer.getReadIndex();
 715  
                     
 716  0
                     output.writeBytes(buffer.getWord());
 717  0
                     output.writeInt(count);
 718  0
                     buffer.incrementWord();
 719  
                       
 720  0
                     flushDocument(nextPause);
 721  0
                     assert nextPause == buffer.getReadIndex();
 722  0
                 }
 723  0
             }
 724  
             public final void flushDocument(int pauseIndex) throws IOException {
 725  0
                 while (buffer.getReadIndex() < pauseIndex) {
 726  0
                     int nextPause = buffer.getDocumentEndIndex();
 727  0
                     int count = nextPause - buffer.getReadIndex();
 728  
                     
 729  0
                     output.writeInt(buffer.getDocument());
 730  0
                     output.writeInt(count);
 731  0
                     buffer.incrementDocument();
 732  
                       
 733  0
                     flushTuples(nextPause);
 734  0
                     assert nextPause == buffer.getReadIndex();
 735  0
                 }
 736  0
             }
 737  
             public void flush() throws IOException { 
 738  0
                 flushWord(buffer.getWriteIndex());
 739  0
                 buffer.reset(); 
 740  0
                 lastFlush = true;
 741  0
             }                           
 742  
         }
 743  0
         public static class ShreddedBuffer {
 744  0
             ArrayList<byte[]> words = new ArrayList();
 745  0
             ArrayList<Integer> documents = new ArrayList();
 746  0
             ArrayList<Integer> wordTupleIdx = new ArrayList();
 747  0
             ArrayList<Integer> documentTupleIdx = new ArrayList();
 748  0
             int wordReadIdx = 0;
 749  0
             int documentReadIdx = 0;
 750  
                             
 751  
             int[] values;
 752  0
             int writeTupleIndex = 0;
 753  0
             int readTupleIndex = 0;
 754  
             int batchSize;
 755  
 
 756  0
             public ShreddedBuffer(int batchSize) {
 757  0
                 this.batchSize = batchSize;
 758  
 
 759  0
                 values = new int[batchSize];
 760  0
             }                              
 761  
 
 762  
             public ShreddedBuffer() {    
 763  0
                 this(10000);
 764  0
             }                                                                                                                    
 765  
             
 766  
             public void processWord(byte[] word) {
 767  0
                 words.add(word);
 768  0
                 wordTupleIdx.add(writeTupleIndex);
 769  0
             }                                      
 770  
             public void processDocument(int document) {
 771  0
                 documents.add(document);
 772  0
                 documentTupleIdx.add(writeTupleIndex);
 773  0
             }                                      
 774  
             public void processTuple(int value) {
 775  0
                 assert words.size() > 0;
 776  0
                 assert documents.size() > 0;
 777  0
                 values[writeTupleIndex] = value;
 778  0
                 writeTupleIndex++;
 779  0
             }
 780  
             public void resetData() {
 781  0
                 words.clear();
 782  0
                 documents.clear();
 783  0
                 wordTupleIdx.clear();
 784  0
                 documentTupleIdx.clear();
 785  0
                 writeTupleIndex = 0;
 786  0
             }                  
 787  
                                  
 788  
             public void resetRead() {
 789  0
                 readTupleIndex = 0;
 790  0
                 wordReadIdx = 0;
 791  0
                 documentReadIdx = 0;
 792  0
             } 
 793  
 
 794  
             public void reset() {
 795  0
                 resetData();
 796  0
                 resetRead();
 797  0
             } 
 798  
             public boolean isFull() {
 799  0
                 return writeTupleIndex >= batchSize;
 800  
             }
 801  
 
 802  
             public boolean isEmpty() {
 803  0
                 return writeTupleIndex == 0;
 804  
             }                          
 805  
 
 806  
             public boolean isAtEnd() {
 807  0
                 return readTupleIndex >= writeTupleIndex;
 808  
             }           
 809  
             public void incrementWord() {
 810  0
                 wordReadIdx++;  
 811  0
             }                                                                                              
 812  
 
 813  
             public void autoIncrementWord() {
 814  0
                 while (readTupleIndex >= getWordEndIndex() && readTupleIndex < writeTupleIndex)
 815  0
                     wordReadIdx++;
 816  0
             }                 
 817  
             public void incrementDocument() {
 818  0
                 documentReadIdx++;  
 819  0
             }                                                                                              
 820  
 
 821  
             public void autoIncrementDocument() {
 822  0
                 while (readTupleIndex >= getDocumentEndIndex() && readTupleIndex < writeTupleIndex)
 823  0
                     documentReadIdx++;
 824  0
             }                 
 825  
             public void incrementTuple() {
 826  0
                 readTupleIndex++;
 827  0
             }                    
 828  
             public int getWordEndIndex() {
 829  0
                 if ((wordReadIdx+1) >= wordTupleIdx.size())
 830  0
                     return writeTupleIndex;
 831  0
                 return wordTupleIdx.get(wordReadIdx+1);
 832  
             }
 833  
 
 834  
             public int getDocumentEndIndex() {
 835  0
                 if ((documentReadIdx+1) >= documentTupleIdx.size())
 836  0
                     return writeTupleIndex;
 837  0
                 return documentTupleIdx.get(documentReadIdx+1);
 838  
             }
 839  
             public int getReadIndex() {
 840  0
                 return readTupleIndex;
 841  
             }   
 842  
 
 843  
             public int getWriteIndex() {
 844  0
                 return writeTupleIndex;
 845  
             } 
 846  
             public byte[] getWord() {
 847  0
                 assert readTupleIndex < writeTupleIndex;
 848  0
                 assert wordReadIdx < words.size();
 849  
                 
 850  0
                 return words.get(wordReadIdx);
 851  
             }
 852  
             public int getDocument() {
 853  0
                 assert readTupleIndex < writeTupleIndex;
 854  0
                 assert documentReadIdx < documents.size();
 855  
                 
 856  0
                 return documents.get(documentReadIdx);
 857  
             }
 858  
             public int getValue() {
 859  0
                 assert readTupleIndex < writeTupleIndex;
 860  0
                 return values[readTupleIndex];
 861  
             }                                         
 862  
             public void copyTuples(int endIndex, ShreddedProcessor output) throws IOException {
 863  0
                 while (getReadIndex() < endIndex) {
 864  0
                    output.processTuple(getValue());
 865  0
                    incrementTuple();
 866  
                 }
 867  0
             }                                                                           
 868  
             public void copyUntilIndexWord(int endIndex, ShreddedProcessor output) throws IOException {
 869  0
                 while (getReadIndex() < endIndex) {
 870  0
                     output.processWord(getWord());
 871  0
                     assert getWordEndIndex() <= endIndex;
 872  0
                     copyUntilIndexDocument(getWordEndIndex(), output);
 873  0
                     incrementWord();
 874  
                 }
 875  0
             } 
 876  
             public void copyUntilIndexDocument(int endIndex, ShreddedProcessor output) throws IOException {
 877  0
                 while (getReadIndex() < endIndex) {
 878  0
                     output.processDocument(getDocument());
 879  0
                     assert getDocumentEndIndex() <= endIndex;
 880  0
                     copyTuples(getDocumentEndIndex(), output);
 881  0
                     incrementDocument();
 882  
                 }
 883  0
             }  
 884  
             public void copyUntilWord(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 885  0
                 while (!isAtEnd()) {
 886  0
                     if (other != null) {   
 887  0
                         assert !other.isAtEnd();
 888  0
                         int c = + Utility.compare(getWord(), other.getWord());
 889  
                     
 890  0
                         if (c > 0) {
 891  0
                             break;   
 892  
                         }
 893  
                         
 894  0
                         output.processWord(getWord());
 895  
                                       
 896  0
                         if (c < 0) {
 897  0
                             copyUntilIndexDocument(getWordEndIndex(), output);
 898  0
                         } else if (c == 0) {
 899  0
                             copyUntilDocument(other, output);
 900  0
                             autoIncrementWord();
 901  0
                             break;
 902  
                         }
 903  0
                     } else {
 904  0
                         output.processWord(getWord());
 905  0
                         copyUntilIndexDocument(getWordEndIndex(), output);
 906  
                     }
 907  0
                     incrementWord();  
 908  
                     
 909  
                
 910  
                 }
 911  0
             }
 912  
             public void copyUntilDocument(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 913  0
                 while (!isAtEnd()) {
 914  0
                     if (other != null) {   
 915  0
                         assert !other.isAtEnd();
 916  0
                         int c = + Utility.compare(getDocument(), other.getDocument());
 917  
                     
 918  0
                         if (c > 0) {
 919  0
                             break;   
 920  
                         }
 921  
                         
 922  0
                         output.processDocument(getDocument());
 923  
                                       
 924  0
                         copyTuples(getDocumentEndIndex(), output);
 925  0
                     } else {
 926  0
                         output.processDocument(getDocument());
 927  0
                         copyTuples(getDocumentEndIndex(), output);
 928  
                     }
 929  0
                     incrementDocument();  
 930  
                     
 931  0
                     if (getWordEndIndex() <= readTupleIndex)
 932  0
                         break;   
 933  
                 }
 934  0
             }
 935  
             public void copyUntil(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 936  0
                 copyUntilWord(other, output);
 937  0
             }
 938  
             
 939  
         }                         
 940  0
         public static class ShreddedCombiner implements ReaderSource<DocumentNumberWordInteger>, ShreddedSource {   
 941  
             public ShreddedProcessor processor;
 942  
             Collection<ShreddedReader> readers;       
 943  0
             boolean closeOnExit = false;
 944  0
             boolean uninitialized = true;
 945  0
             PriorityQueue<ShreddedReader> queue = new PriorityQueue<ShreddedReader>();
 946  
             
 947  0
             public ShreddedCombiner(Collection<ShreddedReader> readers, boolean closeOnExit) {
 948  0
                 this.readers = readers;                                                       
 949  0
                 this.closeOnExit = closeOnExit;
 950  0
             }
 951  
                                   
 952  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 953  0
                 if (processor instanceof ShreddedProcessor) {
 954  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 955  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 956  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 957  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 958  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 959  
                 } else {
 960  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 961  
                 }
 962  0
             }                                
 963  
             
 964  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 965  0
                 return DocumentNumberWordInteger.class;
 966  
             }
 967  
             
 968  
             public void initialize() throws IOException {
 969  0
                 for (ShreddedReader reader : readers) {
 970  0
                     reader.fill();                                        
 971  
                     
 972  0
                     if (!reader.getBuffer().isAtEnd())
 973  0
                         queue.add(reader);
 974  
                 }   
 975  
 
 976  0
                 uninitialized = false;
 977  0
             }
 978  
 
 979  
             public void run() throws IOException {
 980  0
                 initialize();
 981  
                
 982  0
                 while (queue.size() > 0) {
 983  0
                     ShreddedReader top = queue.poll();
 984  0
                     ShreddedReader next = null;
 985  0
                     ShreddedBuffer nextBuffer = null; 
 986  
                     
 987  0
                     assert !top.getBuffer().isAtEnd();
 988  
                                                   
 989  0
                     if (queue.size() > 0) {
 990  0
                         next = queue.peek();
 991  0
                         nextBuffer = next.getBuffer();
 992  0
                         assert !nextBuffer.isAtEnd();
 993  
                     }
 994  
                     
 995  0
                     top.getBuffer().copyUntil(nextBuffer, processor);
 996  0
                     if (top.getBuffer().isAtEnd())
 997  0
                         top.fill();                 
 998  
                         
 999  0
                     if (!top.getBuffer().isAtEnd())
 1000  0
                         queue.add(top);
 1001  0
                 }              
 1002  
                 
 1003  0
                 if (closeOnExit)
 1004  0
                     processor.close();
 1005  0
             }
 1006  
 
 1007  
             public DocumentNumberWordInteger read() throws IOException {
 1008  0
                 if (uninitialized)
 1009  0
                     initialize();
 1010  
 
 1011  0
                 DocumentNumberWordInteger result = null;
 1012  
 
 1013  0
                 while (queue.size() > 0) {
 1014  0
                     ShreddedReader top = queue.poll();
 1015  0
                     result = top.read();
 1016  
 
 1017  0
                     if (result != null) {
 1018  0
                         if (top.getBuffer().isAtEnd())
 1019  0
                             top.fill();
 1020  
 
 1021  0
                         queue.offer(top);
 1022  0
                         break;
 1023  
                     } 
 1024  0
                 }
 1025  
 
 1026  0
                 return result;
 1027  
             }
 1028  
         } 
 1029  0
         public static class ShreddedReader implements Step, Comparable<ShreddedReader>, TypeReader<DocumentNumberWordInteger>, ShreddedSource {      
 1030  
             public ShreddedProcessor processor;
 1031  
             ShreddedBuffer buffer;
 1032  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();         
 1033  0
             long updateWordCount = -1;
 1034  0
             long updateDocumentCount = -1;
 1035  0
             long tupleCount = 0;
 1036  0
             long bufferStartCount = 0;  
 1037  
             ArrayInput input;
 1038  
             
 1039  0
             public ShreddedReader(ArrayInput input) {
 1040  0
                 this.input = input; 
 1041  0
                 this.buffer = new ShreddedBuffer();
 1042  0
             }                               
 1043  
             
 1044  0
             public ShreddedReader(ArrayInput input, int bufferSize) { 
 1045  0
                 this.input = input;
 1046  0
                 this.buffer = new ShreddedBuffer(bufferSize);
 1047  0
             }
 1048  
                  
 1049  
             public final int compareTo(ShreddedReader other) {
 1050  0
                 ShreddedBuffer otherBuffer = other.getBuffer();
 1051  
                 
 1052  0
                 if (buffer.isAtEnd() && otherBuffer.isAtEnd()) {
 1053  0
                     return 0;                 
 1054  0
                 } else if (buffer.isAtEnd()) {
 1055  0
                     return -1;
 1056  0
                 } else if (otherBuffer.isAtEnd()) {
 1057  0
                     return 1;
 1058  
                 }
 1059  
                                    
 1060  0
                 int result = 0;
 1061  
                 do {
 1062  0
                     result = + Utility.compare(buffer.getWord(), otherBuffer.getWord());
 1063  0
                     if(result != 0) break;
 1064  0
                     result = + Utility.compare(buffer.getDocument(), otherBuffer.getDocument());
 1065  0
                     if(result != 0) break;
 1066  
                 } while (false);                                             
 1067  
                 
 1068  0
                 return result;
 1069  
             }
 1070  
             
 1071  
             public final ShreddedBuffer getBuffer() {
 1072  0
                 return buffer;
 1073  
             }                
 1074  
             
 1075  
             public final DocumentNumberWordInteger read() throws IOException {
 1076  0
                 if (buffer.isAtEnd()) {
 1077  0
                     fill();             
 1078  
                 
 1079  0
                     if (buffer.isAtEnd()) {
 1080  0
                         return null;
 1081  
                     }
 1082  
                 }
 1083  
                       
 1084  0
                 assert !buffer.isAtEnd();
 1085  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1086  
                 
 1087  0
                 result.word = buffer.getWord();
 1088  0
                 result.document = buffer.getDocument();
 1089  0
                 result.value = buffer.getValue();
 1090  
                 
 1091  0
                 buffer.incrementTuple();
 1092  0
                 buffer.autoIncrementWord();
 1093  0
                 buffer.autoIncrementDocument();
 1094  
                 
 1095  0
                 return result;
 1096  
             }           
 1097  
             
 1098  
             public final void fill() throws IOException {
 1099  
                 try {   
 1100  0
                     buffer.reset();
 1101  
                     
 1102  0
                     if (tupleCount != 0) {
 1103  
                                                       
 1104  0
                         if(updateWordCount - tupleCount > 0) {
 1105  0
                             buffer.words.add(last.word);
 1106  0
                             buffer.wordTupleIdx.add((int) (updateWordCount - tupleCount));
 1107  
                         }                              
 1108  0
                         if(updateDocumentCount - tupleCount > 0) {
 1109  0
                             buffer.documents.add(last.document);
 1110  0
                             buffer.documentTupleIdx.add((int) (updateDocumentCount - tupleCount));
 1111  
                         }
 1112  0
                         bufferStartCount = tupleCount;
 1113  
                     }
 1114  
                     
 1115  0
                     while (!buffer.isFull()) {
 1116  0
                         updateDocument();
 1117  0
                         buffer.processTuple(input.readInt());
 1118  0
                         tupleCount++;
 1119  
                     }
 1120  0
                 } catch(EOFException e) {}
 1121  0
             }
 1122  
 
 1123  
             public final void updateWord() throws IOException {
 1124  0
                 if (updateWordCount > tupleCount)
 1125  0
                     return;
 1126  
                      
 1127  0
                 last.word = input.readBytes();
 1128  0
                 updateWordCount = tupleCount + input.readInt();
 1129  
                                       
 1130  0
                 buffer.processWord(last.word);
 1131  0
             }
 1132  
             public final void updateDocument() throws IOException {
 1133  0
                 if (updateDocumentCount > tupleCount)
 1134  0
                     return;
 1135  
                      
 1136  0
                 updateWord();
 1137  0
                 last.document = input.readInt();
 1138  0
                 updateDocumentCount = tupleCount + input.readInt();
 1139  
                                       
 1140  0
                 buffer.processDocument(last.document);
 1141  0
             }
 1142  
 
 1143  
             public void run() throws IOException {
 1144  
                 while (true) {
 1145  0
                     fill();
 1146  
                     
 1147  0
                     if (buffer.isAtEnd())
 1148  0
                         break;
 1149  
                     
 1150  0
                     buffer.copyUntil(null, processor);
 1151  
                 }      
 1152  0
                 processor.close();
 1153  0
             }
 1154  
             
 1155  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 1156  0
                 if (processor instanceof ShreddedProcessor) {
 1157  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 1158  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 1159  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 1160  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 1161  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 1162  
                 } else {
 1163  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 1164  
                 }
 1165  0
             }                                
 1166  
             
 1167  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 1168  0
                 return DocumentNumberWordInteger.class;
 1169  
             }                
 1170  
         }
 1171  
         
 1172  
         public static class DuplicateEliminator implements ShreddedProcessor {
 1173  
             public ShreddedProcessor processor;
 1174  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 1175  0
             boolean wordProcess = true;
 1176  0
             boolean documentProcess = true;
 1177  
                                            
 1178  0
             public DuplicateEliminator() {}
 1179  0
             public DuplicateEliminator(ShreddedProcessor processor) {
 1180  0
                 this.processor = processor;
 1181  0
             }
 1182  
             
 1183  
             public void setShreddedProcessor(ShreddedProcessor processor) {
 1184  0
                 this.processor = processor;
 1185  0
             }
 1186  
 
 1187  
             public void processWord(byte[] word) throws IOException {  
 1188  0
                 if (wordProcess || Utility.compare(word, last.word) != 0) {
 1189  0
                     last.word = word;
 1190  0
                     processor.processWord(word);
 1191  0
             resetDocument();
 1192  0
                     wordProcess = false;
 1193  
                 }
 1194  0
             }
 1195  
             public void processDocument(int document) throws IOException {  
 1196  0
                 if (documentProcess || Utility.compare(document, last.document) != 0) {
 1197  0
                     last.document = document;
 1198  0
                     processor.processDocument(document);
 1199  0
                     documentProcess = false;
 1200  
                 }
 1201  0
             }  
 1202  
             
 1203  
             public void resetWord() {
 1204  0
                  wordProcess = true;
 1205  0
             resetDocument();
 1206  0
             }                                                
 1207  
             public void resetDocument() {
 1208  0
                  documentProcess = true;
 1209  0
             }                                                
 1210  
                                
 1211  
             public void processTuple(int value) throws IOException {
 1212  0
                 processor.processTuple(value);
 1213  0
             } 
 1214  
             
 1215  
             public void close() throws IOException {
 1216  0
                 processor.close();
 1217  0
             }                    
 1218  
         }
 1219  
         public static class TupleUnshredder implements ShreddedProcessor {
 1220  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 1221  
             public org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor;                               
 1222  
             
 1223  0
             public TupleUnshredder(DocumentNumberWordInteger.Processor processor) {
 1224  0
                 this.processor = processor;
 1225  0
             }         
 1226  
             
 1227  0
             public TupleUnshredder(org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor) {
 1228  0
                 this.processor = processor;
 1229  0
             }
 1230  
             
 1231  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 1232  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1233  0
                 if (object == null) return result;
 1234  0
                 result.word = object.word; 
 1235  0
                 result.document = object.document; 
 1236  0
                 result.value = object.value; 
 1237  0
                 return result;
 1238  
             }                 
 1239  
             
 1240  
             public void processWord(byte[] word) throws IOException {
 1241  0
                 last.word = word;
 1242  0
             }   
 1243  
                 
 1244  
             public void processDocument(int document) throws IOException {
 1245  0
                 last.document = document;
 1246  0
             }   
 1247  
                 
 1248  
             
 1249  
             public void processTuple(int value) throws IOException {
 1250  0
                 last.value = value;
 1251  0
                 processor.process(clone(last));
 1252  0
             }               
 1253  
             
 1254  
             public void close() throws IOException {
 1255  0
                 processor.close();
 1256  0
             }
 1257  
         }     
 1258  0
         public static class TupleShredder implements Processor {
 1259  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 1260  
             public ShreddedProcessor processor;
 1261  
             
 1262  0
             public TupleShredder(ShreddedProcessor processor) {
 1263  0
                 this.processor = processor;
 1264  0
             }                              
 1265  
             
 1266  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 1267  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1268  0
                 if (object == null) return result;
 1269  0
                 result.word = object.word; 
 1270  0
                 result.document = object.document; 
 1271  0
                 result.value = object.value; 
 1272  0
                 return result;
 1273  
             }                 
 1274  
             
 1275  
             public void process(DocumentNumberWordInteger object) throws IOException {                                                                                                                                                   
 1276  0
                 boolean processAll = false;
 1277  0
                 if(last == null || Utility.compare(last.word, object.word) != 0 || processAll) { processor.processWord(object.word); processAll = true; }
 1278  0
                 if(last == null || Utility.compare(last.document, object.document) != 0 || processAll) { processor.processDocument(object.document); processAll = true; }
 1279  0
                 processor.processTuple(object.value);                                         
 1280  0
             }
 1281  
                           
 1282  
             public Class<DocumentNumberWordInteger> getInputClass() {
 1283  0
                 return DocumentNumberWordInteger.class;
 1284  
             }
 1285  
             
 1286  
             public void close() throws IOException {
 1287  0
                 processor.close();
 1288  0
             }                     
 1289  
         }
 1290  
     } 
 1291  0
     public static class DocumentOrder implements Order<DocumentNumberWordInteger> {
 1292  
         public int hash(DocumentNumberWordInteger object) {
 1293  0
             int h = 0;
 1294  0
             h += Utility.hash(object.document);
 1295  0
             return h;
 1296  
         } 
 1297  
         public Comparator<DocumentNumberWordInteger> greaterThan() {
 1298  0
             return new Comparator<DocumentNumberWordInteger>() {
 1299  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 1300  0
                     int result = 0;
 1301  
                     do {
 1302  0
                         result = + Utility.compare(one.document, two.document);
 1303  0
                         if(result != 0) break;
 1304  
                     } while (false);
 1305  0
                     return -result;
 1306  
                 }
 1307  
             };
 1308  
         }     
 1309  
         public Comparator<DocumentNumberWordInteger> lessThan() {
 1310  0
             return new Comparator<DocumentNumberWordInteger>() {
 1311  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 1312  0
                     int result = 0;
 1313  
                     do {
 1314  0
                         result = + Utility.compare(one.document, two.document);
 1315  0
                         if(result != 0) break;
 1316  
                     } while (false);
 1317  0
                     return result;
 1318  
                 }
 1319  
             };
 1320  
         }     
 1321  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input) {
 1322  0
             return new ShreddedReader(_input);
 1323  
         }    
 1324  
 
 1325  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input, int bufferSize) {
 1326  0
             return new ShreddedReader(_input, bufferSize);
 1327  
         }    
 1328  
         public OrderedWriter<DocumentNumberWordInteger> orderedWriter(ArrayOutput _output) {
 1329  0
             ShreddedWriter w = new ShreddedWriter(_output);
 1330  0
             return new OrderedWriterClass(w); 
 1331  
         }                                    
 1332  0
         public static class OrderedWriterClass extends OrderedWriter< DocumentNumberWordInteger > {
 1333  0
             DocumentNumberWordInteger last = null;
 1334  0
             ShreddedWriter shreddedWriter = null; 
 1335  
             
 1336  0
             public OrderedWriterClass(ShreddedWriter s) {
 1337  0
                 this.shreddedWriter = s;
 1338  0
             }
 1339  
             
 1340  
             public void process(DocumentNumberWordInteger object) throws IOException {
 1341  0
                boolean processAll = false;
 1342  0
                if (processAll || last == null || 0 != Utility.compare(object.document, last.document)) { processAll = true; shreddedWriter.processDocument(object.document); }
 1343  0
                shreddedWriter.processTuple(object.word, object.value);
 1344  0
                last = object;
 1345  0
             }           
 1346  
                  
 1347  
             public void close() throws IOException {
 1348  0
                 shreddedWriter.close();
 1349  0
             }
 1350  
             
 1351  
             public Class<DocumentNumberWordInteger> getInputClass() {
 1352  0
                 return DocumentNumberWordInteger.class;
 1353  
             }
 1354  
         } 
 1355  
         public ReaderSource<DocumentNumberWordInteger> orderedCombiner(Collection<TypeReader<DocumentNumberWordInteger>> readers, boolean closeOnExit) {
 1356  0
             ArrayList<ShreddedReader> shreddedReaders = new ArrayList();
 1357  
             
 1358  0
             for (TypeReader<DocumentNumberWordInteger> reader : readers) {
 1359  0
                 shreddedReaders.add((ShreddedReader)reader);
 1360  
             }
 1361  
             
 1362  0
             return new ShreddedCombiner(shreddedReaders, closeOnExit);
 1363  
         }                  
 1364  
         public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 1365  0
             DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1366  0
             if (object == null) return result;
 1367  0
             result.word = object.word; 
 1368  0
             result.document = object.document; 
 1369  0
             result.value = object.value; 
 1370  0
             return result;
 1371  
         }                 
 1372  
         public Class<DocumentNumberWordInteger> getOrderedClass() {
 1373  0
             return DocumentNumberWordInteger.class;
 1374  
         }                           
 1375  
         public String[] getOrderSpec() {
 1376  0
             return new String[] {"+document"};
 1377  
         }
 1378  
 
 1379  
         public static String getSpecString() {
 1380  0
             return "+document";
 1381  
         }
 1382  
                            
 1383  
         public interface ShreddedProcessor extends Step {
 1384  
             public void processDocument(int document) throws IOException;
 1385  
             public void processTuple(byte[] word, int value) throws IOException;
 1386  
             public void close() throws IOException;
 1387  
         }    
 1388  
         public interface ShreddedSource extends Step {
 1389  
         }                                              
 1390  
         
 1391  0
         public static class ShreddedWriter implements ShreddedProcessor {
 1392  
             ArrayOutput output;
 1393  0
             ShreddedBuffer buffer = new ShreddedBuffer();
 1394  
             int lastDocument;
 1395  0
             boolean lastFlush = false;
 1396  
             
 1397  0
             public ShreddedWriter(ArrayOutput output) {
 1398  0
                 this.output = output;
 1399  0
             }                        
 1400  
             
 1401  
             public void close() throws IOException {
 1402  0
                 flush();
 1403  0
             }
 1404  
             
 1405  
             public void processDocument(int document) {
 1406  0
                 lastDocument = document;
 1407  0
                 buffer.processDocument(document);
 1408  0
             }
 1409  
             public final void processTuple(byte[] word, int value) throws IOException {
 1410  0
                 if (lastFlush) {
 1411  0
                     if(buffer.documents.size() == 0) buffer.processDocument(lastDocument);
 1412  0
                     lastFlush = false;
 1413  
                 }
 1414  0
                 buffer.processTuple(word, value);
 1415  0
                 if (buffer.isFull())
 1416  0
                     flush();
 1417  0
             }
 1418  
             public final void flushTuples(int pauseIndex) throws IOException {
 1419  
                 
 1420  0
                 while (buffer.getReadIndex() < pauseIndex) {
 1421  
                            
 1422  0
                     output.writeBytes(buffer.getWord());
 1423  0
                     output.writeInt(buffer.getValue());
 1424  0
                     buffer.incrementTuple();
 1425  
                 }
 1426  0
             }  
 1427  
             public final void flushDocument(int pauseIndex) throws IOException {
 1428  0
                 while (buffer.getReadIndex() < pauseIndex) {
 1429  0
                     int nextPause = buffer.getDocumentEndIndex();
 1430  0
                     int count = nextPause - buffer.getReadIndex();
 1431  
                     
 1432  0
                     output.writeInt(buffer.getDocument());
 1433  0
                     output.writeInt(count);
 1434  0
                     buffer.incrementDocument();
 1435  
                       
 1436  0
                     flushTuples(nextPause);
 1437  0
                     assert nextPause == buffer.getReadIndex();
 1438  0
                 }
 1439  0
             }
 1440  
             public void flush() throws IOException { 
 1441  0
                 flushDocument(buffer.getWriteIndex());
 1442  0
                 buffer.reset(); 
 1443  0
                 lastFlush = true;
 1444  0
             }                           
 1445  
         }
 1446  0
         public static class ShreddedBuffer {
 1447  0
             ArrayList<Integer> documents = new ArrayList();
 1448  0
             ArrayList<Integer> documentTupleIdx = new ArrayList();
 1449  0
             int documentReadIdx = 0;
 1450  
                             
 1451  
             byte[][] words;
 1452  
             int[] values;
 1453  0
             int writeTupleIndex = 0;
 1454  0
             int readTupleIndex = 0;
 1455  
             int batchSize;
 1456  
 
 1457  0
             public ShreddedBuffer(int batchSize) {
 1458  0
                 this.batchSize = batchSize;
 1459  
 
 1460  0
                 words = new byte[batchSize][];
 1461  0
                 values = new int[batchSize];
 1462  0
             }                              
 1463  
 
 1464  
             public ShreddedBuffer() {    
 1465  0
                 this(10000);
 1466  0
             }                                                                                                                    
 1467  
             
 1468  
             public void processDocument(int document) {
 1469  0
                 documents.add(document);
 1470  0
                 documentTupleIdx.add(writeTupleIndex);
 1471  0
             }                                      
 1472  
             public void processTuple(byte[] word, int value) {
 1473  0
                 assert documents.size() > 0;
 1474  0
                 words[writeTupleIndex] = word;
 1475  0
                 values[writeTupleIndex] = value;
 1476  0
                 writeTupleIndex++;
 1477  0
             }
 1478  
             public void resetData() {
 1479  0
                 documents.clear();
 1480  0
                 documentTupleIdx.clear();
 1481  0
                 writeTupleIndex = 0;
 1482  0
             }                  
 1483  
                                  
 1484  
             public void resetRead() {
 1485  0
                 readTupleIndex = 0;
 1486  0
                 documentReadIdx = 0;
 1487  0
             } 
 1488  
 
 1489  
             public void reset() {
 1490  0
                 resetData();
 1491  0
                 resetRead();
 1492  0
             } 
 1493  
             public boolean isFull() {
 1494  0
                 return writeTupleIndex >= batchSize;
 1495  
             }
 1496  
 
 1497  
             public boolean isEmpty() {
 1498  0
                 return writeTupleIndex == 0;
 1499  
             }                          
 1500  
 
 1501  
             public boolean isAtEnd() {
 1502  0
                 return readTupleIndex >= writeTupleIndex;
 1503  
             }           
 1504  
             public void incrementDocument() {
 1505  0
                 documentReadIdx++;  
 1506  0
             }                                                                                              
 1507  
 
 1508  
             public void autoIncrementDocument() {
 1509  0
                 while (readTupleIndex >= getDocumentEndIndex() && readTupleIndex < writeTupleIndex)
 1510  0
                     documentReadIdx++;
 1511  0
             }                 
 1512  
             public void incrementTuple() {
 1513  0
                 readTupleIndex++;
 1514  0
             }                    
 1515  
             public int getDocumentEndIndex() {
 1516  0
                 if ((documentReadIdx+1) >= documentTupleIdx.size())
 1517  0
                     return writeTupleIndex;
 1518  0
                 return documentTupleIdx.get(documentReadIdx+1);
 1519  
             }
 1520  
             public int getReadIndex() {
 1521  0
                 return readTupleIndex;
 1522  
             }   
 1523  
 
 1524  
             public int getWriteIndex() {
 1525  0
                 return writeTupleIndex;
 1526  
             } 
 1527  
             public int getDocument() {
 1528  0
                 assert readTupleIndex < writeTupleIndex;
 1529  0
                 assert documentReadIdx < documents.size();
 1530  
                 
 1531  0
                 return documents.get(documentReadIdx);
 1532  
             }
 1533  
             public byte[] getWord() {
 1534  0
                 assert readTupleIndex < writeTupleIndex;
 1535  0
                 return words[readTupleIndex];
 1536  
             }                                         
 1537  
             public int getValue() {
 1538  0
                 assert readTupleIndex < writeTupleIndex;
 1539  0
                 return values[readTupleIndex];
 1540  
             }                                         
 1541  
             public void copyTuples(int endIndex, ShreddedProcessor output) throws IOException {
 1542  0
                 while (getReadIndex() < endIndex) {
 1543  0
                    output.processTuple(getWord(), getValue());
 1544  0
                    incrementTuple();
 1545  
                 }
 1546  0
             }                                                                           
 1547  
             public void copyUntilIndexDocument(int endIndex, ShreddedProcessor output) throws IOException {
 1548  0
                 while (getReadIndex() < endIndex) {
 1549  0
                     output.processDocument(getDocument());
 1550  0
                     assert getDocumentEndIndex() <= endIndex;
 1551  0
                     copyTuples(getDocumentEndIndex(), output);
 1552  0
                     incrementDocument();
 1553  
                 }
 1554  0
             }  
 1555  
             public void copyUntilDocument(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 1556  0
                 while (!isAtEnd()) {
 1557  0
                     if (other != null) {   
 1558  0
                         assert !other.isAtEnd();
 1559  0
                         int c = + Utility.compare(getDocument(), other.getDocument());
 1560  
                     
 1561  0
                         if (c > 0) {
 1562  0
                             break;   
 1563  
                         }
 1564  
                         
 1565  0
                         output.processDocument(getDocument());
 1566  
                                       
 1567  0
                         copyTuples(getDocumentEndIndex(), output);
 1568  0
                     } else {
 1569  0
                         output.processDocument(getDocument());
 1570  0
                         copyTuples(getDocumentEndIndex(), output);
 1571  
                     }
 1572  0
                     incrementDocument();  
 1573  
                     
 1574  
                
 1575  
                 }
 1576  0
             }
 1577  
             public void copyUntil(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 1578  0
                 copyUntilDocument(other, output);
 1579  0
             }
 1580  
             
 1581  
         }                         
 1582  0
         public static class ShreddedCombiner implements ReaderSource<DocumentNumberWordInteger>, ShreddedSource {   
 1583  
             public ShreddedProcessor processor;
 1584  
             Collection<ShreddedReader> readers;       
 1585  0
             boolean closeOnExit = false;
 1586  0
             boolean uninitialized = true;
 1587  0
             PriorityQueue<ShreddedReader> queue = new PriorityQueue<ShreddedReader>();
 1588  
             
 1589  0
             public ShreddedCombiner(Collection<ShreddedReader> readers, boolean closeOnExit) {
 1590  0
                 this.readers = readers;                                                       
 1591  0
                 this.closeOnExit = closeOnExit;
 1592  0
             }
 1593  
                                   
 1594  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 1595  0
                 if (processor instanceof ShreddedProcessor) {
 1596  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 1597  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 1598  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 1599  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 1600  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 1601  
                 } else {
 1602  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 1603  
                 }
 1604  0
             }                                
 1605  
             
 1606  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 1607  0
                 return DocumentNumberWordInteger.class;
 1608  
             }
 1609  
             
 1610  
             public void initialize() throws IOException {
 1611  0
                 for (ShreddedReader reader : readers) {
 1612  0
                     reader.fill();                                        
 1613  
                     
 1614  0
                     if (!reader.getBuffer().isAtEnd())
 1615  0
                         queue.add(reader);
 1616  
                 }   
 1617  
 
 1618  0
                 uninitialized = false;
 1619  0
             }
 1620  
 
 1621  
             public void run() throws IOException {
 1622  0
                 initialize();
 1623  
                
 1624  0
                 while (queue.size() > 0) {
 1625  0
                     ShreddedReader top = queue.poll();
 1626  0
                     ShreddedReader next = null;
 1627  0
                     ShreddedBuffer nextBuffer = null; 
 1628  
                     
 1629  0
                     assert !top.getBuffer().isAtEnd();
 1630  
                                                   
 1631  0
                     if (queue.size() > 0) {
 1632  0
                         next = queue.peek();
 1633  0
                         nextBuffer = next.getBuffer();
 1634  0
                         assert !nextBuffer.isAtEnd();
 1635  
                     }
 1636  
                     
 1637  0
                     top.getBuffer().copyUntil(nextBuffer, processor);
 1638  0
                     if (top.getBuffer().isAtEnd())
 1639  0
                         top.fill();                 
 1640  
                         
 1641  0
                     if (!top.getBuffer().isAtEnd())
 1642  0
                         queue.add(top);
 1643  0
                 }              
 1644  
                 
 1645  0
                 if (closeOnExit)
 1646  0
                     processor.close();
 1647  0
             }
 1648  
 
 1649  
             public DocumentNumberWordInteger read() throws IOException {
 1650  0
                 if (uninitialized)
 1651  0
                     initialize();
 1652  
 
 1653  0
                 DocumentNumberWordInteger result = null;
 1654  
 
 1655  0
                 while (queue.size() > 0) {
 1656  0
                     ShreddedReader top = queue.poll();
 1657  0
                     result = top.read();
 1658  
 
 1659  0
                     if (result != null) {
 1660  0
                         if (top.getBuffer().isAtEnd())
 1661  0
                             top.fill();
 1662  
 
 1663  0
                         queue.offer(top);
 1664  0
                         break;
 1665  
                     } 
 1666  0
                 }
 1667  
 
 1668  0
                 return result;
 1669  
             }
 1670  
         } 
 1671  0
         public static class ShreddedReader implements Step, Comparable<ShreddedReader>, TypeReader<DocumentNumberWordInteger>, ShreddedSource {      
 1672  
             public ShreddedProcessor processor;
 1673  
             ShreddedBuffer buffer;
 1674  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();         
 1675  0
             long updateDocumentCount = -1;
 1676  0
             long tupleCount = 0;
 1677  0
             long bufferStartCount = 0;  
 1678  
             ArrayInput input;
 1679  
             
 1680  0
             public ShreddedReader(ArrayInput input) {
 1681  0
                 this.input = input; 
 1682  0
                 this.buffer = new ShreddedBuffer();
 1683  0
             }                               
 1684  
             
 1685  0
             public ShreddedReader(ArrayInput input, int bufferSize) { 
 1686  0
                 this.input = input;
 1687  0
                 this.buffer = new ShreddedBuffer(bufferSize);
 1688  0
             }
 1689  
                  
 1690  
             public final int compareTo(ShreddedReader other) {
 1691  0
                 ShreddedBuffer otherBuffer = other.getBuffer();
 1692  
                 
 1693  0
                 if (buffer.isAtEnd() && otherBuffer.isAtEnd()) {
 1694  0
                     return 0;                 
 1695  0
                 } else if (buffer.isAtEnd()) {
 1696  0
                     return -1;
 1697  0
                 } else if (otherBuffer.isAtEnd()) {
 1698  0
                     return 1;
 1699  
                 }
 1700  
                                    
 1701  0
                 int result = 0;
 1702  
                 do {
 1703  0
                     result = + Utility.compare(buffer.getDocument(), otherBuffer.getDocument());
 1704  0
                     if(result != 0) break;
 1705  
                 } while (false);                                             
 1706  
                 
 1707  0
                 return result;
 1708  
             }
 1709  
             
 1710  
             public final ShreddedBuffer getBuffer() {
 1711  0
                 return buffer;
 1712  
             }                
 1713  
             
 1714  
             public final DocumentNumberWordInteger read() throws IOException {
 1715  0
                 if (buffer.isAtEnd()) {
 1716  0
                     fill();             
 1717  
                 
 1718  0
                     if (buffer.isAtEnd()) {
 1719  0
                         return null;
 1720  
                     }
 1721  
                 }
 1722  
                       
 1723  0
                 assert !buffer.isAtEnd();
 1724  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1725  
                 
 1726  0
                 result.document = buffer.getDocument();
 1727  0
                 result.word = buffer.getWord();
 1728  0
                 result.value = buffer.getValue();
 1729  
                 
 1730  0
                 buffer.incrementTuple();
 1731  0
                 buffer.autoIncrementDocument();
 1732  
                 
 1733  0
                 return result;
 1734  
             }           
 1735  
             
 1736  
             public final void fill() throws IOException {
 1737  
                 try {   
 1738  0
                     buffer.reset();
 1739  
                     
 1740  0
                     if (tupleCount != 0) {
 1741  
                                                       
 1742  0
                         if(updateDocumentCount - tupleCount > 0) {
 1743  0
                             buffer.documents.add(last.document);
 1744  0
                             buffer.documentTupleIdx.add((int) (updateDocumentCount - tupleCount));
 1745  
                         }
 1746  0
                         bufferStartCount = tupleCount;
 1747  
                     }
 1748  
                     
 1749  0
                     while (!buffer.isFull()) {
 1750  0
                         updateDocument();
 1751  0
                         buffer.processTuple(input.readBytes(), input.readInt());
 1752  0
                         tupleCount++;
 1753  
                     }
 1754  0
                 } catch(EOFException e) {}
 1755  0
             }
 1756  
 
 1757  
             public final void updateDocument() throws IOException {
 1758  0
                 if (updateDocumentCount > tupleCount)
 1759  0
                     return;
 1760  
                      
 1761  0
                 last.document = input.readInt();
 1762  0
                 updateDocumentCount = tupleCount + input.readInt();
 1763  
                                       
 1764  0
                 buffer.processDocument(last.document);
 1765  0
             }
 1766  
 
 1767  
             public void run() throws IOException {
 1768  
                 while (true) {
 1769  0
                     fill();
 1770  
                     
 1771  0
                     if (buffer.isAtEnd())
 1772  0
                         break;
 1773  
                     
 1774  0
                     buffer.copyUntil(null, processor);
 1775  
                 }      
 1776  0
                 processor.close();
 1777  0
             }
 1778  
             
 1779  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 1780  0
                 if (processor instanceof ShreddedProcessor) {
 1781  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 1782  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 1783  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 1784  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 1785  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 1786  
                 } else {
 1787  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 1788  
                 }
 1789  0
             }                                
 1790  
             
 1791  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 1792  0
                 return DocumentNumberWordInteger.class;
 1793  
             }                
 1794  
         }
 1795  
         
 1796  
         public static class DuplicateEliminator implements ShreddedProcessor {
 1797  
             public ShreddedProcessor processor;
 1798  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 1799  0
             boolean documentProcess = true;
 1800  
                                            
 1801  0
             public DuplicateEliminator() {}
 1802  0
             public DuplicateEliminator(ShreddedProcessor processor) {
 1803  0
                 this.processor = processor;
 1804  0
             }
 1805  
             
 1806  
             public void setShreddedProcessor(ShreddedProcessor processor) {
 1807  0
                 this.processor = processor;
 1808  0
             }
 1809  
 
 1810  
             public void processDocument(int document) throws IOException {  
 1811  0
                 if (documentProcess || Utility.compare(document, last.document) != 0) {
 1812  0
                     last.document = document;
 1813  0
                     processor.processDocument(document);
 1814  0
                     documentProcess = false;
 1815  
                 }
 1816  0
             }  
 1817  
             
 1818  
             public void resetDocument() {
 1819  0
                  documentProcess = true;
 1820  0
             }                                                
 1821  
                                
 1822  
             public void processTuple(byte[] word, int value) throws IOException {
 1823  0
                 processor.processTuple(word, value);
 1824  0
             } 
 1825  
             
 1826  
             public void close() throws IOException {
 1827  0
                 processor.close();
 1828  0
             }                    
 1829  
         }
 1830  
         public static class TupleUnshredder implements ShreddedProcessor {
 1831  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 1832  
             public org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor;                               
 1833  
             
 1834  0
             public TupleUnshredder(DocumentNumberWordInteger.Processor processor) {
 1835  0
                 this.processor = processor;
 1836  0
             }         
 1837  
             
 1838  0
             public TupleUnshredder(org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor) {
 1839  0
                 this.processor = processor;
 1840  0
             }
 1841  
             
 1842  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 1843  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1844  0
                 if (object == null) return result;
 1845  0
                 result.word = object.word; 
 1846  0
                 result.document = object.document; 
 1847  0
                 result.value = object.value; 
 1848  0
                 return result;
 1849  
             }                 
 1850  
             
 1851  
             public void processDocument(int document) throws IOException {
 1852  0
                 last.document = document;
 1853  0
             }   
 1854  
                 
 1855  
             
 1856  
             public void processTuple(byte[] word, int value) throws IOException {
 1857  0
                 last.word = word;
 1858  0
                 last.value = value;
 1859  0
                 processor.process(clone(last));
 1860  0
             }               
 1861  
             
 1862  
             public void close() throws IOException {
 1863  0
                 processor.close();
 1864  0
             }
 1865  
         }     
 1866  0
         public static class TupleShredder implements Processor {
 1867  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 1868  
             public ShreddedProcessor processor;
 1869  
             
 1870  0
             public TupleShredder(ShreddedProcessor processor) {
 1871  0
                 this.processor = processor;
 1872  0
             }                              
 1873  
             
 1874  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 1875  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1876  0
                 if (object == null) return result;
 1877  0
                 result.word = object.word; 
 1878  0
                 result.document = object.document; 
 1879  0
                 result.value = object.value; 
 1880  0
                 return result;
 1881  
             }                 
 1882  
             
 1883  
             public void process(DocumentNumberWordInteger object) throws IOException {                                                                                                                                                   
 1884  0
                 boolean processAll = false;
 1885  0
                 if(last == null || Utility.compare(last.document, object.document) != 0 || processAll) { processor.processDocument(object.document); processAll = true; }
 1886  0
                 processor.processTuple(object.word, object.value);                                         
 1887  0
             }
 1888  
                           
 1889  
             public Class<DocumentNumberWordInteger> getInputClass() {
 1890  0
                 return DocumentNumberWordInteger.class;
 1891  
             }
 1892  
             
 1893  
             public void close() throws IOException {
 1894  0
                 processor.close();
 1895  0
             }                     
 1896  
         }
 1897  
     } 
 1898  0
     public static class ValueOrder implements Order<DocumentNumberWordInteger> {
 1899  
         public int hash(DocumentNumberWordInteger object) {
 1900  0
             int h = 0;
 1901  0
             h += Utility.hash(object.value);
 1902  0
             return h;
 1903  
         } 
 1904  
         public Comparator<DocumentNumberWordInteger> greaterThan() {
 1905  0
             return new Comparator<DocumentNumberWordInteger>() {
 1906  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 1907  0
                     int result = 0;
 1908  
                     do {
 1909  0
                         result = + Utility.compare(one.value, two.value);
 1910  0
                         if(result != 0) break;
 1911  
                     } while (false);
 1912  0
                     return -result;
 1913  
                 }
 1914  
             };
 1915  
         }     
 1916  
         public Comparator<DocumentNumberWordInteger> lessThan() {
 1917  0
             return new Comparator<DocumentNumberWordInteger>() {
 1918  0
                 public int compare(DocumentNumberWordInteger one, DocumentNumberWordInteger two) {
 1919  0
                     int result = 0;
 1920  
                     do {
 1921  0
                         result = + Utility.compare(one.value, two.value);
 1922  0
                         if(result != 0) break;
 1923  
                     } while (false);
 1924  0
                     return result;
 1925  
                 }
 1926  
             };
 1927  
         }     
 1928  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input) {
 1929  0
             return new ShreddedReader(_input);
 1930  
         }    
 1931  
 
 1932  
         public TypeReader<DocumentNumberWordInteger> orderedReader(ArrayInput _input, int bufferSize) {
 1933  0
             return new ShreddedReader(_input, bufferSize);
 1934  
         }    
 1935  
         public OrderedWriter<DocumentNumberWordInteger> orderedWriter(ArrayOutput _output) {
 1936  0
             ShreddedWriter w = new ShreddedWriter(_output);
 1937  0
             return new OrderedWriterClass(w); 
 1938  
         }                                    
 1939  0
         public static class OrderedWriterClass extends OrderedWriter< DocumentNumberWordInteger > {
 1940  0
             DocumentNumberWordInteger last = null;
 1941  0
             ShreddedWriter shreddedWriter = null; 
 1942  
             
 1943  0
             public OrderedWriterClass(ShreddedWriter s) {
 1944  0
                 this.shreddedWriter = s;
 1945  0
             }
 1946  
             
 1947  
             public void process(DocumentNumberWordInteger object) throws IOException {
 1948  0
                boolean processAll = false;
 1949  0
                if (processAll || last == null || 0 != Utility.compare(object.value, last.value)) { processAll = true; shreddedWriter.processValue(object.value); }
 1950  0
                shreddedWriter.processTuple(object.word, object.document);
 1951  0
                last = object;
 1952  0
             }           
 1953  
                  
 1954  
             public void close() throws IOException {
 1955  0
                 shreddedWriter.close();
 1956  0
             }
 1957  
             
 1958  
             public Class<DocumentNumberWordInteger> getInputClass() {
 1959  0
                 return DocumentNumberWordInteger.class;
 1960  
             }
 1961  
         } 
 1962  
         public ReaderSource<DocumentNumberWordInteger> orderedCombiner(Collection<TypeReader<DocumentNumberWordInteger>> readers, boolean closeOnExit) {
 1963  0
             ArrayList<ShreddedReader> shreddedReaders = new ArrayList();
 1964  
             
 1965  0
             for (TypeReader<DocumentNumberWordInteger> reader : readers) {
 1966  0
                 shreddedReaders.add((ShreddedReader)reader);
 1967  
             }
 1968  
             
 1969  0
             return new ShreddedCombiner(shreddedReaders, closeOnExit);
 1970  
         }                  
 1971  
         public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 1972  0
             DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 1973  0
             if (object == null) return result;
 1974  0
             result.word = object.word; 
 1975  0
             result.document = object.document; 
 1976  0
             result.value = object.value; 
 1977  0
             return result;
 1978  
         }                 
 1979  
         public Class<DocumentNumberWordInteger> getOrderedClass() {
 1980  0
             return DocumentNumberWordInteger.class;
 1981  
         }                           
 1982  
         public String[] getOrderSpec() {
 1983  0
             return new String[] {"+value"};
 1984  
         }
 1985  
 
 1986  
         public static String getSpecString() {
 1987  0
             return "+value";
 1988  
         }
 1989  
                            
 1990  
         public interface ShreddedProcessor extends Step {
 1991  
             public void processValue(int value) throws IOException;
 1992  
             public void processTuple(byte[] word, int document) throws IOException;
 1993  
             public void close() throws IOException;
 1994  
         }    
 1995  
         public interface ShreddedSource extends Step {
 1996  
         }                                              
 1997  
         
 1998  0
         public static class ShreddedWriter implements ShreddedProcessor {
 1999  
             ArrayOutput output;
 2000  0
             ShreddedBuffer buffer = new ShreddedBuffer();
 2001  
             int lastValue;
 2002  0
             boolean lastFlush = false;
 2003  
             
 2004  0
             public ShreddedWriter(ArrayOutput output) {
 2005  0
                 this.output = output;
 2006  0
             }                        
 2007  
             
 2008  
             public void close() throws IOException {
 2009  0
                 flush();
 2010  0
             }
 2011  
             
 2012  
             public void processValue(int value) {
 2013  0
                 lastValue = value;
 2014  0
                 buffer.processValue(value);
 2015  0
             }
 2016  
             public final void processTuple(byte[] word, int document) throws IOException {
 2017  0
                 if (lastFlush) {
 2018  0
                     if(buffer.values.size() == 0) buffer.processValue(lastValue);
 2019  0
                     lastFlush = false;
 2020  
                 }
 2021  0
                 buffer.processTuple(word, document);
 2022  0
                 if (buffer.isFull())
 2023  0
                     flush();
 2024  0
             }
 2025  
             public final void flushTuples(int pauseIndex) throws IOException {
 2026  
                 
 2027  0
                 while (buffer.getReadIndex() < pauseIndex) {
 2028  
                            
 2029  0
                     output.writeBytes(buffer.getWord());
 2030  0
                     output.writeInt(buffer.getDocument());
 2031  0
                     buffer.incrementTuple();
 2032  
                 }
 2033  0
             }  
 2034  
             public final void flushValue(int pauseIndex) throws IOException {
 2035  0
                 while (buffer.getReadIndex() < pauseIndex) {
 2036  0
                     int nextPause = buffer.getValueEndIndex();
 2037  0
                     int count = nextPause - buffer.getReadIndex();
 2038  
                     
 2039  0
                     output.writeInt(buffer.getValue());
 2040  0
                     output.writeInt(count);
 2041  0
                     buffer.incrementValue();
 2042  
                       
 2043  0
                     flushTuples(nextPause);
 2044  0
                     assert nextPause == buffer.getReadIndex();
 2045  0
                 }
 2046  0
             }
 2047  
             public void flush() throws IOException { 
 2048  0
                 flushValue(buffer.getWriteIndex());
 2049  0
                 buffer.reset(); 
 2050  0
                 lastFlush = true;
 2051  0
             }                           
 2052  
         }
 2053  0
         public static class ShreddedBuffer {
 2054  0
             ArrayList<Integer> values = new ArrayList();
 2055  0
             ArrayList<Integer> valueTupleIdx = new ArrayList();
 2056  0
             int valueReadIdx = 0;
 2057  
                             
 2058  
             byte[][] words;
 2059  
             int[] documents;
 2060  0
             int writeTupleIndex = 0;
 2061  0
             int readTupleIndex = 0;
 2062  
             int batchSize;
 2063  
 
 2064  0
             public ShreddedBuffer(int batchSize) {
 2065  0
                 this.batchSize = batchSize;
 2066  
 
 2067  0
                 words = new byte[batchSize][];
 2068  0
                 documents = new int[batchSize];
 2069  0
             }                              
 2070  
 
 2071  
             public ShreddedBuffer() {    
 2072  0
                 this(10000);
 2073  0
             }                                                                                                                    
 2074  
             
 2075  
             public void processValue(int value) {
 2076  0
                 values.add(value);
 2077  0
                 valueTupleIdx.add(writeTupleIndex);
 2078  0
             }                                      
 2079  
             public void processTuple(byte[] word, int document) {
 2080  0
                 assert values.size() > 0;
 2081  0
                 words[writeTupleIndex] = word;
 2082  0
                 documents[writeTupleIndex] = document;
 2083  0
                 writeTupleIndex++;
 2084  0
             }
 2085  
             public void resetData() {
 2086  0
                 values.clear();
 2087  0
                 valueTupleIdx.clear();
 2088  0
                 writeTupleIndex = 0;
 2089  0
             }                  
 2090  
                                  
 2091  
             public void resetRead() {
 2092  0
                 readTupleIndex = 0;
 2093  0
                 valueReadIdx = 0;
 2094  0
             } 
 2095  
 
 2096  
             public void reset() {
 2097  0
                 resetData();
 2098  0
                 resetRead();
 2099  0
             } 
 2100  
             public boolean isFull() {
 2101  0
                 return writeTupleIndex >= batchSize;
 2102  
             }
 2103  
 
 2104  
             public boolean isEmpty() {
 2105  0
                 return writeTupleIndex == 0;
 2106  
             }                          
 2107  
 
 2108  
             public boolean isAtEnd() {
 2109  0
                 return readTupleIndex >= writeTupleIndex;
 2110  
             }           
 2111  
             public void incrementValue() {
 2112  0
                 valueReadIdx++;  
 2113  0
             }                                                                                              
 2114  
 
 2115  
             public void autoIncrementValue() {
 2116  0
                 while (readTupleIndex >= getValueEndIndex() && readTupleIndex < writeTupleIndex)
 2117  0
                     valueReadIdx++;
 2118  0
             }                 
 2119  
             public void incrementTuple() {
 2120  0
                 readTupleIndex++;
 2121  0
             }                    
 2122  
             public int getValueEndIndex() {
 2123  0
                 if ((valueReadIdx+1) >= valueTupleIdx.size())
 2124  0
                     return writeTupleIndex;
 2125  0
                 return valueTupleIdx.get(valueReadIdx+1);
 2126  
             }
 2127  
             public int getReadIndex() {
 2128  0
                 return readTupleIndex;
 2129  
             }   
 2130  
 
 2131  
             public int getWriteIndex() {
 2132  0
                 return writeTupleIndex;
 2133  
             } 
 2134  
             public int getValue() {
 2135  0
                 assert readTupleIndex < writeTupleIndex;
 2136  0
                 assert valueReadIdx < values.size();
 2137  
                 
 2138  0
                 return values.get(valueReadIdx);
 2139  
             }
 2140  
             public byte[] getWord() {
 2141  0
                 assert readTupleIndex < writeTupleIndex;
 2142  0
                 return words[readTupleIndex];
 2143  
             }                                         
 2144  
             public int getDocument() {
 2145  0
                 assert readTupleIndex < writeTupleIndex;
 2146  0
                 return documents[readTupleIndex];
 2147  
             }                                         
 2148  
             public void copyTuples(int endIndex, ShreddedProcessor output) throws IOException {
 2149  0
                 while (getReadIndex() < endIndex) {
 2150  0
                    output.processTuple(getWord(), getDocument());
 2151  0
                    incrementTuple();
 2152  
                 }
 2153  0
             }                                                                           
 2154  
             public void copyUntilIndexValue(int endIndex, ShreddedProcessor output) throws IOException {
 2155  0
                 while (getReadIndex() < endIndex) {
 2156  0
                     output.processValue(getValue());
 2157  0
                     assert getValueEndIndex() <= endIndex;
 2158  0
                     copyTuples(getValueEndIndex(), output);
 2159  0
                     incrementValue();
 2160  
                 }
 2161  0
             }  
 2162  
             public void copyUntilValue(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 2163  0
                 while (!isAtEnd()) {
 2164  0
                     if (other != null) {   
 2165  0
                         assert !other.isAtEnd();
 2166  0
                         int c = + Utility.compare(getValue(), other.getValue());
 2167  
                     
 2168  0
                         if (c > 0) {
 2169  0
                             break;   
 2170  
                         }
 2171  
                         
 2172  0
                         output.processValue(getValue());
 2173  
                                       
 2174  0
                         copyTuples(getValueEndIndex(), output);
 2175  0
                     } else {
 2176  0
                         output.processValue(getValue());
 2177  0
                         copyTuples(getValueEndIndex(), output);
 2178  
                     }
 2179  0
                     incrementValue();  
 2180  
                     
 2181  
                
 2182  
                 }
 2183  0
             }
 2184  
             public void copyUntil(ShreddedBuffer other, ShreddedProcessor output) throws IOException {
 2185  0
                 copyUntilValue(other, output);
 2186  0
             }
 2187  
             
 2188  
         }                         
 2189  0
         public static class ShreddedCombiner implements ReaderSource<DocumentNumberWordInteger>, ShreddedSource {   
 2190  
             public ShreddedProcessor processor;
 2191  
             Collection<ShreddedReader> readers;       
 2192  0
             boolean closeOnExit = false;
 2193  0
             boolean uninitialized = true;
 2194  0
             PriorityQueue<ShreddedReader> queue = new PriorityQueue<ShreddedReader>();
 2195  
             
 2196  0
             public ShreddedCombiner(Collection<ShreddedReader> readers, boolean closeOnExit) {
 2197  0
                 this.readers = readers;                                                       
 2198  0
                 this.closeOnExit = closeOnExit;
 2199  0
             }
 2200  
                                   
 2201  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 2202  0
                 if (processor instanceof ShreddedProcessor) {
 2203  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 2204  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 2205  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 2206  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 2207  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 2208  
                 } else {
 2209  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 2210  
                 }
 2211  0
             }                                
 2212  
             
 2213  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 2214  0
                 return DocumentNumberWordInteger.class;
 2215  
             }
 2216  
             
 2217  
             public void initialize() throws IOException {
 2218  0
                 for (ShreddedReader reader : readers) {
 2219  0
                     reader.fill();                                        
 2220  
                     
 2221  0
                     if (!reader.getBuffer().isAtEnd())
 2222  0
                         queue.add(reader);
 2223  
                 }   
 2224  
 
 2225  0
                 uninitialized = false;
 2226  0
             }
 2227  
 
 2228  
             public void run() throws IOException {
 2229  0
                 initialize();
 2230  
                
 2231  0
                 while (queue.size() > 0) {
 2232  0
                     ShreddedReader top = queue.poll();
 2233  0
                     ShreddedReader next = null;
 2234  0
                     ShreddedBuffer nextBuffer = null; 
 2235  
                     
 2236  0
                     assert !top.getBuffer().isAtEnd();
 2237  
                                                   
 2238  0
                     if (queue.size() > 0) {
 2239  0
                         next = queue.peek();
 2240  0
                         nextBuffer = next.getBuffer();
 2241  0
                         assert !nextBuffer.isAtEnd();
 2242  
                     }
 2243  
                     
 2244  0
                     top.getBuffer().copyUntil(nextBuffer, processor);
 2245  0
                     if (top.getBuffer().isAtEnd())
 2246  0
                         top.fill();                 
 2247  
                         
 2248  0
                     if (!top.getBuffer().isAtEnd())
 2249  0
                         queue.add(top);
 2250  0
                 }              
 2251  
                 
 2252  0
                 if (closeOnExit)
 2253  0
                     processor.close();
 2254  0
             }
 2255  
 
 2256  
             public DocumentNumberWordInteger read() throws IOException {
 2257  0
                 if (uninitialized)
 2258  0
                     initialize();
 2259  
 
 2260  0
                 DocumentNumberWordInteger result = null;
 2261  
 
 2262  0
                 while (queue.size() > 0) {
 2263  0
                     ShreddedReader top = queue.poll();
 2264  0
                     result = top.read();
 2265  
 
 2266  0
                     if (result != null) {
 2267  0
                         if (top.getBuffer().isAtEnd())
 2268  0
                             top.fill();
 2269  
 
 2270  0
                         queue.offer(top);
 2271  0
                         break;
 2272  
                     } 
 2273  0
                 }
 2274  
 
 2275  0
                 return result;
 2276  
             }
 2277  
         } 
 2278  0
         public static class ShreddedReader implements Step, Comparable<ShreddedReader>, TypeReader<DocumentNumberWordInteger>, ShreddedSource {      
 2279  
             public ShreddedProcessor processor;
 2280  
             ShreddedBuffer buffer;
 2281  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();         
 2282  0
             long updateValueCount = -1;
 2283  0
             long tupleCount = 0;
 2284  0
             long bufferStartCount = 0;  
 2285  
             ArrayInput input;
 2286  
             
 2287  0
             public ShreddedReader(ArrayInput input) {
 2288  0
                 this.input = input; 
 2289  0
                 this.buffer = new ShreddedBuffer();
 2290  0
             }                               
 2291  
             
 2292  0
             public ShreddedReader(ArrayInput input, int bufferSize) { 
 2293  0
                 this.input = input;
 2294  0
                 this.buffer = new ShreddedBuffer(bufferSize);
 2295  0
             }
 2296  
                  
 2297  
             public final int compareTo(ShreddedReader other) {
 2298  0
                 ShreddedBuffer otherBuffer = other.getBuffer();
 2299  
                 
 2300  0
                 if (buffer.isAtEnd() && otherBuffer.isAtEnd()) {
 2301  0
                     return 0;                 
 2302  0
                 } else if (buffer.isAtEnd()) {
 2303  0
                     return -1;
 2304  0
                 } else if (otherBuffer.isAtEnd()) {
 2305  0
                     return 1;
 2306  
                 }
 2307  
                                    
 2308  0
                 int result = 0;
 2309  
                 do {
 2310  0
                     result = + Utility.compare(buffer.getValue(), otherBuffer.getValue());
 2311  0
                     if(result != 0) break;
 2312  
                 } while (false);                                             
 2313  
                 
 2314  0
                 return result;
 2315  
             }
 2316  
             
 2317  
             public final ShreddedBuffer getBuffer() {
 2318  0
                 return buffer;
 2319  
             }                
 2320  
             
 2321  
             public final DocumentNumberWordInteger read() throws IOException {
 2322  0
                 if (buffer.isAtEnd()) {
 2323  0
                     fill();             
 2324  
                 
 2325  0
                     if (buffer.isAtEnd()) {
 2326  0
                         return null;
 2327  
                     }
 2328  
                 }
 2329  
                       
 2330  0
                 assert !buffer.isAtEnd();
 2331  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 2332  
                 
 2333  0
                 result.value = buffer.getValue();
 2334  0
                 result.word = buffer.getWord();
 2335  0
                 result.document = buffer.getDocument();
 2336  
                 
 2337  0
                 buffer.incrementTuple();
 2338  0
                 buffer.autoIncrementValue();
 2339  
                 
 2340  0
                 return result;
 2341  
             }           
 2342  
             
 2343  
             public final void fill() throws IOException {
 2344  
                 try {   
 2345  0
                     buffer.reset();
 2346  
                     
 2347  0
                     if (tupleCount != 0) {
 2348  
                                                       
 2349  0
                         if(updateValueCount - tupleCount > 0) {
 2350  0
                             buffer.values.add(last.value);
 2351  0
                             buffer.valueTupleIdx.add((int) (updateValueCount - tupleCount));
 2352  
                         }
 2353  0
                         bufferStartCount = tupleCount;
 2354  
                     }
 2355  
                     
 2356  0
                     while (!buffer.isFull()) {
 2357  0
                         updateValue();
 2358  0
                         buffer.processTuple(input.readBytes(), input.readInt());
 2359  0
                         tupleCount++;
 2360  
                     }
 2361  0
                 } catch(EOFException e) {}
 2362  0
             }
 2363  
 
 2364  
             public final void updateValue() throws IOException {
 2365  0
                 if (updateValueCount > tupleCount)
 2366  0
                     return;
 2367  
                      
 2368  0
                 last.value = input.readInt();
 2369  0
                 updateValueCount = tupleCount + input.readInt();
 2370  
                                       
 2371  0
                 buffer.processValue(last.value);
 2372  0
             }
 2373  
 
 2374  
             public void run() throws IOException {
 2375  
                 while (true) {
 2376  0
                     fill();
 2377  
                     
 2378  0
                     if (buffer.isAtEnd())
 2379  0
                         break;
 2380  
                     
 2381  0
                     buffer.copyUntil(null, processor);
 2382  
                 }      
 2383  0
                 processor.close();
 2384  0
             }
 2385  
             
 2386  
             public void setProcessor(Step processor) throws IncompatibleProcessorException {  
 2387  0
                 if (processor instanceof ShreddedProcessor) {
 2388  0
                     this.processor = new DuplicateEliminator((ShreddedProcessor) processor);
 2389  0
                 } else if (processor instanceof DocumentNumberWordInteger.Processor) {
 2390  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((DocumentNumberWordInteger.Processor) processor));
 2391  0
                 } else if (processor instanceof org.galagosearch.tupleflow.Processor) {
 2392  0
                     this.processor = new DuplicateEliminator(new TupleUnshredder((org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger>) processor));
 2393  
                 } else {
 2394  0
                     throw new IncompatibleProcessorException(processor.getClass().getName() + " is not supported by " + this.getClass().getName());                                                                       
 2395  
                 }
 2396  0
             }                                
 2397  
             
 2398  
             public Class<DocumentNumberWordInteger> getOutputClass() {
 2399  0
                 return DocumentNumberWordInteger.class;
 2400  
             }                
 2401  
         }
 2402  
         
 2403  
         public static class DuplicateEliminator implements ShreddedProcessor {
 2404  
             public ShreddedProcessor processor;
 2405  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 2406  0
             boolean valueProcess = true;
 2407  
                                            
 2408  0
             public DuplicateEliminator() {}
 2409  0
             public DuplicateEliminator(ShreddedProcessor processor) {
 2410  0
                 this.processor = processor;
 2411  0
             }
 2412  
             
 2413  
             public void setShreddedProcessor(ShreddedProcessor processor) {
 2414  0
                 this.processor = processor;
 2415  0
             }
 2416  
 
 2417  
             public void processValue(int value) throws IOException {  
 2418  0
                 if (valueProcess || Utility.compare(value, last.value) != 0) {
 2419  0
                     last.value = value;
 2420  0
                     processor.processValue(value);
 2421  0
                     valueProcess = false;
 2422  
                 }
 2423  0
             }  
 2424  
             
 2425  
             public void resetValue() {
 2426  0
                  valueProcess = true;
 2427  0
             }                                                
 2428  
                                
 2429  
             public void processTuple(byte[] word, int document) throws IOException {
 2430  0
                 processor.processTuple(word, document);
 2431  0
             } 
 2432  
             
 2433  
             public void close() throws IOException {
 2434  0
                 processor.close();
 2435  0
             }                    
 2436  
         }
 2437  
         public static class TupleUnshredder implements ShreddedProcessor {
 2438  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 2439  
             public org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor;                               
 2440  
             
 2441  0
             public TupleUnshredder(DocumentNumberWordInteger.Processor processor) {
 2442  0
                 this.processor = processor;
 2443  0
             }         
 2444  
             
 2445  0
             public TupleUnshredder(org.galagosearch.tupleflow.Processor<DocumentNumberWordInteger> processor) {
 2446  0
                 this.processor = processor;
 2447  0
             }
 2448  
             
 2449  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 2450  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 2451  0
                 if (object == null) return result;
 2452  0
                 result.word = object.word; 
 2453  0
                 result.document = object.document; 
 2454  0
                 result.value = object.value; 
 2455  0
                 return result;
 2456  
             }                 
 2457  
             
 2458  
             public void processValue(int value) throws IOException {
 2459  0
                 last.value = value;
 2460  0
             }   
 2461  
                 
 2462  
             
 2463  
             public void processTuple(byte[] word, int document) throws IOException {
 2464  0
                 last.word = word;
 2465  0
                 last.document = document;
 2466  0
                 processor.process(clone(last));
 2467  0
             }               
 2468  
             
 2469  
             public void close() throws IOException {
 2470  0
                 processor.close();
 2471  0
             }
 2472  
         }     
 2473  0
         public static class TupleShredder implements Processor {
 2474  0
             DocumentNumberWordInteger last = new DocumentNumberWordInteger();
 2475  
             public ShreddedProcessor processor;
 2476  
             
 2477  0
             public TupleShredder(ShreddedProcessor processor) {
 2478  0
                 this.processor = processor;
 2479  0
             }                              
 2480  
             
 2481  
             public DocumentNumberWordInteger clone(DocumentNumberWordInteger object) {
 2482  0
                 DocumentNumberWordInteger result = new DocumentNumberWordInteger();
 2483  0
                 if (object == null) return result;
 2484  0
                 result.word = object.word; 
 2485  0
                 result.document = object.document; 
 2486  0
                 result.value = object.value; 
 2487  0
                 return result;
 2488  
             }                 
 2489  
             
 2490  
             public void process(DocumentNumberWordInteger object) throws IOException {                                                                                                                                                   
 2491  0
                 boolean processAll = false;
 2492  0
                 if(last == null || Utility.compare(last.value, object.value) != 0 || processAll) { processor.processValue(object.value); processAll = true; }
 2493  0
                 processor.processTuple(object.word, object.document);                                         
 2494  0
             }
 2495  
                           
 2496  
             public Class<DocumentNumberWordInteger> getInputClass() {
 2497  0
                 return DocumentNumberWordInteger.class;
 2498  
             }
 2499  
             
 2500  
             public void close() throws IOException {
 2501  0
                 processor.close();
 2502  0
             }                     
 2503  
         }
 2504  
     } 
 2505  
 }