View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.index;
4   
5   import java.io.BufferedInputStream;
6   import java.io.DataInputStream;
7   import java.io.FileInputStream;
8   import java.io.IOException;
9   import java.util.ArrayList;
10  import org.galagosearch.tupleflow.Utility;
11  /***
12   * Reads a binary file of document names produced by DocumentNameWriter.
13   * The names are loaded into RAM for quick access.
14   *
15   * @author trevor
16   */
17  public class DocumentNameReader {
18      private static class NameSlot {
19          public String prefix;
20          public int offset;
21          public int footerWidth;
22          public int[] footers;
23      }
24  
25      ArrayList<NameSlot> slots;
26      int documentCount;
27      
28      /*** Creates a new instance of DocumentNameReader */
29      public DocumentNameReader(String filename) throws IOException {
30          FileInputStream f = new FileInputStream(filename);
31          DataInputStream input = new DataInputStream(new BufferedInputStream(f));
32          slots = new ArrayList();
33          read(input);
34          input.close();
35      }
36      
37      private String getInSlot(NameSlot slot, int footerIndex) {
38          int footer = slot.footers[footerIndex-slot.offset];
39          String prefix = slot.prefix;
40          String documentName;
41          
42          if(slot.footerWidth == 0) {
43              documentName = slot.prefix;
44          } else {
45              String format = "%s-%0" + slot.footerWidth + "d";
46              documentName = String.format(format, prefix, footer);
47          }
48          
49          return documentName;
50      }
51      
52      public String get(int index) {
53          assert index >= 0;
54          assert index < documentCount;
55          
56          if(index >= documentCount) 
57              return "unknown";
58          
59          if(index < 0)
60              return "unknown";
61          
62          int big = slots.size()-1;
63          int small = 0;
64          
65          while(big-small > 1) {
66              int middle = small + (big-small)/2;
67              
68              if(slots.get(middle).offset >= index)
69                  big = middle;
70              else
71                  small = middle;
72          }
73  
74          NameSlot one = slots.get(small);
75          NameSlot two = slots.get(big);
76          String result = "";
77          
78          if (two.offset <= index)
79              result = getInSlot(two, index);
80          else
81              result = getInSlot(one, index);
82          
83          return result;
84      }
85      
86      public void read(DataInputStream input) throws IOException {
87          int offset = 0;
88          
89          // open a file
90          while(input.available() > 0) {
91              // read the prefix
92              int prefixLength = input.readInt();
93              byte[] prefixData = new byte[prefixLength];
94              input.read(prefixData);
95              
96              // read the footers
97              int footerWidth = input.readInt();
98              int footerCount = input.readInt();
99              NameSlot slot = new NameSlot();
100             
101             slot.footerWidth = footerWidth;
102             slot.offset = offset;
103             slot.prefix = Utility.makeString(prefixData);
104             slot.footers = new int[footerCount];
105             
106             for(int i=0; i<footerCount; i++) {
107                 slot.footers[i] = input.readInt();
108             }
109             
110             slots.add(slot);
111             offset += slot.footers.length;
112         }
113         
114         documentCount = offset;
115     }
116 }