| 1 | |
|
| 2 | |
|
| 3 | |
package org.galagosearch.core.index; |
| 4 | |
|
| 5 | |
import java.io.BufferedInputStream; |
| 6 | |
import java.io.DataInputStream; |
| 7 | |
import java.io.FileInputStream; |
| 8 | |
import java.io.IOException; |
| 9 | |
import java.util.ArrayList; |
| 10 | |
import org.galagosearch.tupleflow.Utility; |
| 11 | |
|
| 12 | |
|
| 13 | |
|
| 14 | |
|
| 15 | |
|
| 16 | |
|
| 17 | 4 | public class DocumentNameReader { |
| 18 | 480 | private static class NameSlot { |
| 19 | |
public String prefix; |
| 20 | |
public int offset; |
| 21 | |
public int footerWidth; |
| 22 | |
public int[] footers; |
| 23 | |
} |
| 24 | |
|
| 25 | |
ArrayList<NameSlot> slots; |
| 26 | |
int documentCount; |
| 27 | |
|
| 28 | |
|
| 29 | 12 | public DocumentNameReader(String filename) throws IOException { |
| 30 | 12 | FileInputStream f = new FileInputStream(filename); |
| 31 | 12 | DataInputStream input = new DataInputStream(new BufferedInputStream(f)); |
| 32 | 12 | slots = new ArrayList(); |
| 33 | 12 | read(input); |
| 34 | 12 | input.close(); |
| 35 | 12 | } |
| 36 | |
|
| 37 | |
private String getInSlot(NameSlot slot, int footerIndex) { |
| 38 | 20 | int footer = slot.footers[footerIndex-slot.offset]; |
| 39 | 20 | String prefix = slot.prefix; |
| 40 | |
String documentName; |
| 41 | |
|
| 42 | 20 | if(slot.footerWidth == 0) { |
| 43 | 20 | documentName = slot.prefix; |
| 44 | |
} else { |
| 45 | 0 | String format = "%s-%0" + slot.footerWidth + "d"; |
| 46 | 0 | documentName = String.format(format, prefix, footer); |
| 47 | |
} |
| 48 | |
|
| 49 | 20 | return documentName; |
| 50 | |
} |
| 51 | |
|
| 52 | |
public String get(int index) { |
| 53 | 20 | assert index >= 0; |
| 54 | 20 | assert index < documentCount; |
| 55 | |
|
| 56 | 20 | if(index >= documentCount) |
| 57 | 0 | return "unknown"; |
| 58 | |
|
| 59 | 20 | if(index < 0) |
| 60 | 0 | return "unknown"; |
| 61 | |
|
| 62 | 20 | int big = slots.size()-1; |
| 63 | 20 | int small = 0; |
| 64 | |
|
| 65 | 100 | while(big-small > 1) { |
| 66 | 80 | int middle = small + (big-small)/2; |
| 67 | |
|
| 68 | 80 | if(slots.get(middle).offset >= index) |
| 69 | 60 | big = middle; |
| 70 | |
else |
| 71 | 20 | small = middle; |
| 72 | 80 | } |
| 73 | |
|
| 74 | 20 | NameSlot one = slots.get(small); |
| 75 | 20 | NameSlot two = slots.get(big); |
| 76 | 20 | String result = ""; |
| 77 | |
|
| 78 | 20 | if (two.offset <= index) |
| 79 | 20 | result = getInSlot(two, index); |
| 80 | |
else |
| 81 | 0 | result = getInSlot(one, index); |
| 82 | |
|
| 83 | 20 | return result; |
| 84 | |
} |
| 85 | |
|
| 86 | |
public void read(DataInputStream input) throws IOException { |
| 87 | 12 | int offset = 0; |
| 88 | |
|
| 89 | |
|
| 90 | 252 | while(input.available() > 0) { |
| 91 | |
|
| 92 | 240 | int prefixLength = input.readInt(); |
| 93 | 240 | byte[] prefixData = new byte[prefixLength]; |
| 94 | 240 | input.read(prefixData); |
| 95 | |
|
| 96 | |
|
| 97 | 240 | int footerWidth = input.readInt(); |
| 98 | 240 | int footerCount = input.readInt(); |
| 99 | 240 | NameSlot slot = new NameSlot(); |
| 100 | |
|
| 101 | 240 | slot.footerWidth = footerWidth; |
| 102 | 240 | slot.offset = offset; |
| 103 | 240 | slot.prefix = Utility.makeString(prefixData); |
| 104 | 240 | slot.footers = new int[footerCount]; |
| 105 | |
|
| 106 | 480 | for(int i=0; i<footerCount; i++) { |
| 107 | 240 | slot.footers[i] = input.readInt(); |
| 108 | |
} |
| 109 | |
|
| 110 | 240 | slots.add(slot); |
| 111 | 240 | offset += slot.footers.length; |
| 112 | 240 | } |
| 113 | |
|
| 114 | 12 | documentCount = offset; |
| 115 | 12 | } |
| 116 | |
} |