1
2
3 package org.galagosearch.core.tools;
4
5 import java.io.IOException;
6 import java.io.PrintWriter;
7 import java.io.UnsupportedEncodingException;
8 import java.net.URLEncoder;
9 import java.util.Arrays;
10 import java.util.HashSet;
11 import java.util.Map.Entry;
12 import java.util.Set;
13 import javax.servlet.ServletException;
14 import javax.servlet.http.HttpServletRequest;
15 import javax.servlet.http.HttpServletResponse;
16 import org.galagosearch.core.parse.Document;
17 import org.galagosearch.core.tools.Search.SearchResult;
18 import org.galagosearch.core.tools.Search.SearchResultItem;
19 import org.mortbay.jetty.handler.AbstractHandler;
20 import org.znerd.xmlenc.XMLOutputter;
21
22 /***
23 * <p>Handles web search requests against a Galago index. Also handles XML requests for
24 * documents, snippets and search results.</p>
25 *
26 * <p>This class is set up to work with an embedded Jetty instance, but it should be
27 * fairly easy to wrap into a Servlet for use with something else (Tomcat, Glassfish, etc.)</p>
28 *
29 * <p>URLs supported:</p>
30 *
31 * <table>
32 * <tr>
33 * <td>/</td>
34 * <td>Main Page</td>
35 * </tr>
36 * <tr>
37 * <td>/search</td>
38 * <td>HTML Search Results (q, start, n)</td>
39 * </tr>
40 * <tr>
41 * <td>/xmlsearch</td>
42 * <td>XML Search Results (q, start, n)</td>
43 * </tr>
44 * <tr>
45 * <td>/snippet</td>
46 * <td>XML Snippet Result (identifier, term+)</td>
47 * </tr>
48 * <tr>
49 * <td>/document</td>
50 * <td>Document Result (identifier)</td>
51 * </tr>
52 * </table>
53 *
54 * @author trevor
55 */
56 public class SearchWebHandler extends AbstractHandler {
57 Search search;
58
59 public SearchWebHandler(Search search) {
60 this.search = search;
61 }
62
63 public String getEscapedString(String text) {
64 StringBuilder builder = new StringBuilder();
65
66 for (int i = 0; i < text.length(); ++i) {
67 char c = text.charAt(i);
68 if (c >= 128) {
69 builder.append("&#" + (int)c + ";");
70 } else {
71 builder.append(c);
72 }
73 }
74
75 return builder.toString();
76 }
77
78 public void handleDocument(HttpServletRequest request, HttpServletResponse response) throws IOException {
79 request.getParameterMap();
80 String identifier = request.getParameter("identifier");
81 Document document = search.getDocument(identifier);
82 response.setContentType("text/html; charset=UTF-8");
83
84 PrintWriter writer = response.getWriter();
85 writer.write(getEscapedString(document.text));
86 writer.close();
87 }
88
89 public void handleSnippet(HttpServletRequest request, HttpServletResponse response) throws IOException {
90 String identifier = request.getParameter("identifier");
91 String[] terms = request.getParameterValues("term");
92 Set<String> queryTerms = new HashSet<String>(Arrays.asList(terms));
93
94 Document document = search.getDocument(identifier);
95
96 if (document == null) {
97 response.setStatus(response.SC_NOT_FOUND);
98 } else {
99 response.setContentType("text/xml");
100 PrintWriter writer = response.getWriter();
101 String snippet = search.getSummary(document, queryTerms);
102 String title = document.metadata.get("title");
103 String url = document.metadata.get("url");
104
105 if (snippet == null) snippet = "";
106
107 response.setContentType("text/xml");
108 writer.append("<response>\n");
109 writer.append(String.format("<snippet>%s</snippet>\n", snippet));
110 writer.append(String.format("<identifier>%s</identifier>\n", identifier));
111 writer.append(String.format("<title>%s</title>\n", scrub(title)));
112 writer.append(String.format("<url>%s</url>\n", scrub(url)));
113 writer.append("</response>");
114 writer.close();
115 }
116 }
117
118 private String scrub(String s) throws UnsupportedEncodingException {
119 if (s == null) return s;
120 return s.replace("<", ">")
121 .replace(">", "<")
122 .replace("&", "&");
123 }
124
125 public void handleSearch(HttpServletRequest request, HttpServletResponse response) throws Exception {
126 SearchResult result = performSearch(request);
127 response.setContentType("text/html");
128 String displayQuery = scrub(request.getParameter("q"));
129 String encodedQuery = URLEncoder.encode(request.getParameter("q"), "UTF-8");
130
131 PrintWriter writer = response.getWriter();
132 writer.append("<html>\n");
133 writer.append("<head>\n");
134 writer.append(String.format("<title>%s - Galago Search</title>\n", displayQuery));
135 writeStyle(writer);
136 writer.append("<script type=\"text/javascript\">\n");
137 writer.append("function toggleDebug() {\n");
138 writer.append(" var object = document.getElementById('debug');\n");
139 writer.append(" if (object.style.display != 'block') {\n");
140 writer.append(" object.style.display = 'block';\n");
141 writer.append(" } else {\n");
142 writer.append(" object.style.display = 'none';\n");
143 writer.append(" }\n");
144 writer.append("}\n");
145 writer.append("</script>\n");
146 writer.append("</head>\n<body>\n");
147
148 writer.append("<div id=\"header\">\n");
149 writer.append("<table><tr>");
150 writer.append("<td><a href=\"http://www.galagosearch.org\">" +
151 "<img src=\"http://www.galagosearch.org/galago.png\"></a></td>");
152 writer.append("<td><br/><form action=\"search\">" +
153 String.format("<input name=\"q\" size=\"40\" value=\"%s\" />", displayQuery) +
154 "<input value=\"Search\" type=\"submit\" /></form></td>");
155 writer.append("</tr>");
156 writer.append("</table>\n");
157 writer.append("</div>\n");
158
159 writer.append("<center>[<a href=\"#\" onClick=\"toggleDebug(); return false;\">debug</a>]</center>");
160 writer.append("<div id=\"debug\">");
161 writer.append("<table>");
162 writer.append(String.format("<tr><td>%s</td><td>%s</td></tr>",
163 "Parsed Query", result.query.toString()));
164 writer.append(String.format("<tr><td>%s</td><td>%s</td></tr>",
165 "Transformed Query", result.transformedQuery.toString()));
166 writer.append("</table>");
167 writer.append("</div>");
168
169 for (SearchResultItem item : result.items) {
170 writer.append("<div id=\"result\">\n");
171 writer.append(String.format("<a href=\"document?identifier=%s\">%s</a><br/>" +
172 "<div id=\"summary\">%s</div>\n" +
173 "<div id=\"meta\">%s - %s</div>\n",
174 item.identifier,
175 item.displayTitle,
176 item.summary,
177 scrub(item.identifier),
178 scrub(item.url)));
179 writer.append("</div>\n");
180 }
181
182 String startAtString = request.getParameter("start");
183 String countString = request.getParameter("n");
184 int startAt = 0;
185 int count = 10;
186
187 if (startAtString != null) {
188 startAt = Integer.parseInt(startAtString);
189 }
190 if (countString != null) {
191 count = Integer.parseInt(countString);
192 }
193
194 writer.append("<center>\n");
195 if (startAt != 0) {
196 writer.append(String.format("<a href=\"search?q=%s&start=%d&n=%d\">Previous</a>",
197 encodedQuery, Math.max(startAt-count,0), count));
198 if (result.items.size() >= count) {
199 writer.append(" | ");
200 }
201 }
202
203 if (result.items.size() >= count) {
204 writer.append(String.format("<a href=\"search?q=%s&start=%d&n=%d\">Next</a>",
205 encodedQuery, startAt+count, count));
206 }
207 writer.append("</center>");
208 writer.append("</body>");
209 writer.append("</html>");
210 writer.close();
211 }
212
213 public void handleSearchXML(HttpServletRequest request, HttpServletResponse response) throws IllegalStateException, IllegalArgumentException, IOException, Exception {
214 SearchResult result = performSearch(request);
215 PrintWriter writer = response.getWriter();
216 XMLOutputter outputter = new XMLOutputter(writer, "UTF-8");
217 response.setContentType("text/xml");
218
219 outputter.startTag("response");
220
221 writer.append("<response>\n");
222 for (SearchResultItem item : result.items) {
223 outputter.startTag("result");
224
225 outputter.startTag("identifier");
226 outputter.pcdata(item.identifier);
227 outputter.endTag();
228
229 outputter.startTag("title");
230 outputter.pcdata(item.displayTitle);
231 outputter.endTag();
232
233 outputter.startTag("url");
234 outputter.pcdata(item.url);
235 outputter.endTag();
236
237 outputter.startTag("snippet");
238 outputter.pcdata(item.summary);
239 outputter.endTag();
240
241 outputter.startTag("rank");
242 outputter.pcdata("" + item.rank);
243 outputter.endTag();
244
245 outputter.startTag("metadata");
246 for (Entry<String, String> entry : item.metadata.entrySet()) {
247 outputter.startTag("item");
248 outputter.startTag("key");
249 outputter.pcdata(entry.getKey());
250 outputter.endTag();
251 outputter.startTag("value");
252 outputter.pcdata(entry.getValue());
253 outputter.endTag();
254 }
255
256 outputter.endTag();
257 }
258 }
259
260 public void writeStyle(PrintWriter writer) {
261 writer.write("<style type=\"text/css\">\n");
262 writer.write("body { font-family: Helvetica, sans-serif; }\n");
263 writer.write("img { border-style: none; }\n");
264 writer.write("#box { border: 1px solid #ccc; margin: 100px auto; width: 500px;" +
265 "background: rgb(210, 233, 217); }\n");
266 writer.write("#box a { font-size: small; text-decoration: none; }\n");
267 writer.write("#box a:link { color: rgb(0, 93, 40); }\n");
268 writer.write("#box a:visited { color: rgb(90, 93, 90); }\n");
269 writer.write("#header { background: rgb(210, 233, 217); border: 1px solid #ccc; }\n");
270 writer.write("#result { padding: 10px 5px; max-width: 550px; }\n");
271 writer.write("#meta { font-size: small; color: rgb(60, 100, 60); }\n");
272 writer.write("#summary { font-size: small; }\n");
273 writer.write("#debug { display: none; }\n");
274 writer.write("</style>");
275 }
276
277 public void handleMainPage(HttpServletRequest request, HttpServletResponse response) throws IOException {
278 PrintWriter writer = response.getWriter();
279 response.setContentType("text/html");
280
281 writer.append("<html>\n");
282 writer.append("<head>\n");
283 writeStyle(writer);
284 writer.append("<title>Galago Search</title></head>");
285 writer.append("<body>");
286 writer.append("<center><br/><br/><div id=\"box\">" +
287 "<a href=\"http://www.galagosearch.org\">" +
288 "<img src=\"http://www.galagosearch.org/galago.png\"/></a><br/>\n");
289 writer.append("<form action=\"search\"><input name=\"q\" size=\"40\">" +
290 "<input value=\"Search\" type=\"submit\" /></form><br/><br/>");
291 writer.append("</div></center></body></html>\n");
292 writer.close();
293 }
294
295 public void handle(String target,
296 HttpServletRequest request,
297 HttpServletResponse response,
298 int dispatch) throws IOException, ServletException {
299 if (request.getPathInfo().equals("/search")) {
300 try {
301 handleSearch(request, response);
302 } catch(Exception e) {
303 throw new ServletException("Caught exception from handleSearch", e);
304 }
305 } else if (request.getPathInfo().equals("/document")) {
306 handleDocument(request, response);
307 } else if (request.getPathInfo().equals("/searchxml")) {
308 try {
309 handleSearchXML(request, response);
310 } catch(Exception e) {
311 throw new ServletException("Caught exception from handleSearchXML", e);
312 }
313 } else if (request.getPathInfo().equals("/snippet")) {
314 handleSnippet(request, response);
315 } else {
316 handleMainPage(request, response);
317 }
318 }
319
320 private SearchResult performSearch(HttpServletRequest request) throws Exception {
321 String query = request.getParameter("q");
322 String startAtString = request.getParameter("start");
323 String countString = request.getParameter("n");
324 int startAt = (startAtString == null) ? 0 : Integer.parseInt(startAtString);
325 int resultCount = (countString == null) ? 10 : Integer.parseInt(countString);
326 SearchResult result = search.runQuery(query, startAt, resultCount, true);
327 return result;
328 }
329 }