| 1 | |
|
| 2 | |
|
| 3 | |
package org.galagosearch.core.tools; |
| 4 | |
|
| 5 | |
import java.io.IOException; |
| 6 | |
import java.io.PrintWriter; |
| 7 | |
import java.io.UnsupportedEncodingException; |
| 8 | |
import java.net.URLEncoder; |
| 9 | |
import java.util.Arrays; |
| 10 | |
import java.util.HashSet; |
| 11 | |
import java.util.Map.Entry; |
| 12 | |
import java.util.Set; |
| 13 | |
import javax.servlet.ServletException; |
| 14 | |
import javax.servlet.http.HttpServletRequest; |
| 15 | |
import javax.servlet.http.HttpServletResponse; |
| 16 | |
import org.galagosearch.core.parse.Document; |
| 17 | |
import org.galagosearch.core.tools.Search.SearchResult; |
| 18 | |
import org.galagosearch.core.tools.Search.SearchResultItem; |
| 19 | |
import org.mortbay.jetty.handler.AbstractHandler; |
| 20 | |
import org.znerd.xmlenc.XMLOutputter; |
| 21 | |
|
| 22 | |
|
| 23 | |
|
| 24 | |
|
| 25 | |
|
| 26 | |
|
| 27 | |
|
| 28 | |
|
| 29 | |
|
| 30 | |
|
| 31 | |
|
| 32 | |
|
| 33 | |
|
| 34 | |
|
| 35 | |
|
| 36 | |
|
| 37 | |
|
| 38 | |
|
| 39 | |
|
| 40 | |
|
| 41 | |
|
| 42 | |
|
| 43 | |
|
| 44 | |
|
| 45 | |
|
| 46 | |
|
| 47 | |
|
| 48 | |
|
| 49 | |
|
| 50 | |
|
| 51 | |
|
| 52 | |
|
| 53 | |
|
| 54 | |
|
| 55 | |
|
| 56 | |
public class SearchWebHandler extends AbstractHandler { |
| 57 | |
Search search; |
| 58 | |
|
| 59 | 0 | public SearchWebHandler(Search search) { |
| 60 | 0 | this.search = search; |
| 61 | 0 | } |
| 62 | |
|
| 63 | |
public String getEscapedString(String text) { |
| 64 | 0 | StringBuilder builder = new StringBuilder(); |
| 65 | |
|
| 66 | 0 | for (int i = 0; i < text.length(); ++i) { |
| 67 | 0 | char c = text.charAt(i); |
| 68 | 0 | if (c >= 128) { |
| 69 | 0 | builder.append("&#" + (int)c + ";"); |
| 70 | |
} else { |
| 71 | 0 | builder.append(c); |
| 72 | |
} |
| 73 | |
} |
| 74 | |
|
| 75 | 0 | return builder.toString(); |
| 76 | |
} |
| 77 | |
|
| 78 | |
public void handleDocument(HttpServletRequest request, HttpServletResponse response) throws IOException { |
| 79 | 0 | request.getParameterMap(); |
| 80 | 0 | String identifier = request.getParameter("identifier"); |
| 81 | 0 | Document document = search.getDocument(identifier); |
| 82 | 0 | response.setContentType("text/html; charset=UTF-8"); |
| 83 | |
|
| 84 | 0 | PrintWriter writer = response.getWriter(); |
| 85 | 0 | writer.write(getEscapedString(document.text)); |
| 86 | 0 | writer.close(); |
| 87 | 0 | } |
| 88 | |
|
| 89 | |
public void handleSnippet(HttpServletRequest request, HttpServletResponse response) throws IOException { |
| 90 | 0 | String identifier = request.getParameter("identifier"); |
| 91 | 0 | String[] terms = request.getParameterValues("term"); |
| 92 | 0 | Set<String> queryTerms = new HashSet<String>(Arrays.asList(terms)); |
| 93 | |
|
| 94 | 0 | Document document = search.getDocument(identifier); |
| 95 | |
|
| 96 | 0 | if (document == null) { |
| 97 | 0 | response.setStatus(response.SC_NOT_FOUND); |
| 98 | |
} else { |
| 99 | 0 | response.setContentType("text/xml"); |
| 100 | 0 | PrintWriter writer = response.getWriter(); |
| 101 | 0 | String snippet = search.getSummary(document, queryTerms); |
| 102 | 0 | String title = document.metadata.get("title"); |
| 103 | 0 | String url = document.metadata.get("url"); |
| 104 | |
|
| 105 | 0 | if (snippet == null) snippet = ""; |
| 106 | |
|
| 107 | 0 | response.setContentType("text/xml"); |
| 108 | 0 | writer.append("<response>\n"); |
| 109 | 0 | writer.append(String.format("<snippet>%s</snippet>\n", snippet)); |
| 110 | 0 | writer.append(String.format("<identifier>%s</identifier>\n", identifier)); |
| 111 | 0 | writer.append(String.format("<title>%s</title>\n", scrub(title))); |
| 112 | 0 | writer.append(String.format("<url>%s</url>\n", scrub(url))); |
| 113 | 0 | writer.append("</response>"); |
| 114 | 0 | writer.close(); |
| 115 | |
} |
| 116 | 0 | } |
| 117 | |
|
| 118 | |
private String scrub(String s) throws UnsupportedEncodingException { |
| 119 | 0 | if (s == null) return s; |
| 120 | 0 | return s.replace("<", ">") |
| 121 | |
.replace(">", "<") |
| 122 | |
.replace("&", "&"); |
| 123 | |
} |
| 124 | |
|
| 125 | |
public void handleSearch(HttpServletRequest request, HttpServletResponse response) throws Exception { |
| 126 | 0 | SearchResult result = performSearch(request); |
| 127 | 0 | response.setContentType("text/html"); |
| 128 | 0 | String displayQuery = scrub(request.getParameter("q")); |
| 129 | 0 | String encodedQuery = URLEncoder.encode(request.getParameter("q"), "UTF-8"); |
| 130 | |
|
| 131 | 0 | PrintWriter writer = response.getWriter(); |
| 132 | 0 | writer.append("<html>\n"); |
| 133 | 0 | writer.append("<head>\n"); |
| 134 | 0 | writer.append(String.format("<title>%s - Galago Search</title>\n", displayQuery)); |
| 135 | 0 | writeStyle(writer); |
| 136 | 0 | writer.append("<script type=\"text/javascript\">\n"); |
| 137 | 0 | writer.append("function toggleDebug() {\n"); |
| 138 | 0 | writer.append(" var object = document.getElementById('debug');\n"); |
| 139 | 0 | writer.append(" if (object.style.display != 'block') {\n"); |
| 140 | 0 | writer.append(" object.style.display = 'block';\n"); |
| 141 | 0 | writer.append(" } else {\n"); |
| 142 | 0 | writer.append(" object.style.display = 'none';\n"); |
| 143 | 0 | writer.append(" }\n"); |
| 144 | 0 | writer.append("}\n"); |
| 145 | 0 | writer.append("</script>\n"); |
| 146 | 0 | writer.append("</head>\n<body>\n"); |
| 147 | |
|
| 148 | 0 | writer.append("<div id=\"header\">\n"); |
| 149 | 0 | writer.append("<table><tr>"); |
| 150 | 0 | writer.append("<td><a href=\"http://www.galagosearch.org\">" + |
| 151 | |
"<img src=\"http://www.galagosearch.org/galago.png\"></a></td>"); |
| 152 | 0 | writer.append("<td><br/><form action=\"search\">" + |
| 153 | |
String.format("<input name=\"q\" size=\"40\" value=\"%s\" />", displayQuery) + |
| 154 | |
"<input value=\"Search\" type=\"submit\" /></form></td>"); |
| 155 | 0 | writer.append("</tr>"); |
| 156 | 0 | writer.append("</table>\n"); |
| 157 | 0 | writer.append("</div>\n"); |
| 158 | |
|
| 159 | 0 | writer.append("<center>[<a href=\"#\" onClick=\"toggleDebug(); return false;\">debug</a>]</center>"); |
| 160 | 0 | writer.append("<div id=\"debug\">"); |
| 161 | 0 | writer.append("<table>"); |
| 162 | 0 | writer.append(String.format("<tr><td>%s</td><td>%s</td></tr>", |
| 163 | |
"Parsed Query", result.query.toString())); |
| 164 | 0 | writer.append(String.format("<tr><td>%s</td><td>%s</td></tr>", |
| 165 | |
"Transformed Query", result.transformedQuery.toString())); |
| 166 | 0 | writer.append("</table>"); |
| 167 | 0 | writer.append("</div>"); |
| 168 | |
|
| 169 | 0 | for (SearchResultItem item : result.items) { |
| 170 | 0 | writer.append("<div id=\"result\">\n"); |
| 171 | 0 | writer.append(String.format("<a href=\"document?identifier=%s\">%s</a><br/>" + |
| 172 | |
"<div id=\"summary\">%s</div>\n" + |
| 173 | |
"<div id=\"meta\">%s - %s</div>\n", |
| 174 | |
item.identifier, |
| 175 | |
item.displayTitle, |
| 176 | |
item.summary, |
| 177 | |
scrub(item.identifier), |
| 178 | |
scrub(item.url))); |
| 179 | 0 | writer.append("</div>\n"); |
| 180 | |
} |
| 181 | |
|
| 182 | 0 | String startAtString = request.getParameter("start"); |
| 183 | 0 | String countString = request.getParameter("n"); |
| 184 | 0 | int startAt = 0; |
| 185 | 0 | int count = 10; |
| 186 | |
|
| 187 | 0 | if (startAtString != null) { |
| 188 | 0 | startAt = Integer.parseInt(startAtString); |
| 189 | |
} |
| 190 | 0 | if (countString != null) { |
| 191 | 0 | count = Integer.parseInt(countString); |
| 192 | |
} |
| 193 | |
|
| 194 | 0 | writer.append("<center>\n"); |
| 195 | 0 | if (startAt != 0) { |
| 196 | 0 | writer.append(String.format("<a href=\"search?q=%s&start=%d&n=%d\">Previous</a>", |
| 197 | |
encodedQuery, Math.max(startAt-count,0), count)); |
| 198 | 0 | if (result.items.size() >= count) { |
| 199 | 0 | writer.append(" | "); |
| 200 | |
} |
| 201 | |
} |
| 202 | |
|
| 203 | 0 | if (result.items.size() >= count) { |
| 204 | 0 | writer.append(String.format("<a href=\"search?q=%s&start=%d&n=%d\">Next</a>", |
| 205 | |
encodedQuery, startAt+count, count)); |
| 206 | |
} |
| 207 | 0 | writer.append("</center>"); |
| 208 | 0 | writer.append("</body>"); |
| 209 | 0 | writer.append("</html>"); |
| 210 | 0 | writer.close(); |
| 211 | 0 | } |
| 212 | |
|
| 213 | |
public void handleSearchXML(HttpServletRequest request, HttpServletResponse response) throws IllegalStateException, IllegalArgumentException, IOException, Exception { |
| 214 | 0 | SearchResult result = performSearch(request); |
| 215 | 0 | PrintWriter writer = response.getWriter(); |
| 216 | 0 | XMLOutputter outputter = new XMLOutputter(writer, "UTF-8"); |
| 217 | 0 | response.setContentType("text/xml"); |
| 218 | |
|
| 219 | 0 | outputter.startTag("response"); |
| 220 | |
|
| 221 | 0 | writer.append("<response>\n"); |
| 222 | 0 | for (SearchResultItem item : result.items) { |
| 223 | 0 | outputter.startTag("result"); |
| 224 | |
|
| 225 | 0 | outputter.startTag("identifier"); |
| 226 | 0 | outputter.pcdata(item.identifier); |
| 227 | 0 | outputter.endTag(); |
| 228 | |
|
| 229 | 0 | outputter.startTag("title"); |
| 230 | 0 | outputter.pcdata(item.displayTitle); |
| 231 | 0 | outputter.endTag(); |
| 232 | |
|
| 233 | 0 | outputter.startTag("url"); |
| 234 | 0 | outputter.pcdata(item.url); |
| 235 | 0 | outputter.endTag(); |
| 236 | |
|
| 237 | 0 | outputter.startTag("snippet"); |
| 238 | 0 | outputter.pcdata(item.summary); |
| 239 | 0 | outputter.endTag(); |
| 240 | |
|
| 241 | 0 | outputter.startTag("rank"); |
| 242 | 0 | outputter.pcdata("" + item.rank); |
| 243 | 0 | outputter.endTag(); |
| 244 | |
|
| 245 | 0 | outputter.startTag("metadata"); |
| 246 | 0 | for (Entry<String, String> entry : item.metadata.entrySet()) { |
| 247 | 0 | outputter.startTag("item"); |
| 248 | 0 | outputter.startTag("key"); |
| 249 | 0 | outputter.pcdata(entry.getKey()); |
| 250 | 0 | outputter.endTag(); |
| 251 | 0 | outputter.startTag("value"); |
| 252 | 0 | outputter.pcdata(entry.getValue()); |
| 253 | 0 | outputter.endTag(); |
| 254 | |
} |
| 255 | |
|
| 256 | 0 | outputter.endTag(); |
| 257 | |
} |
| 258 | 0 | } |
| 259 | |
|
| 260 | |
public void writeStyle(PrintWriter writer) { |
| 261 | 0 | writer.write("<style type=\"text/css\">\n"); |
| 262 | 0 | writer.write("body { font-family: Helvetica, sans-serif; }\n"); |
| 263 | 0 | writer.write("img { border-style: none; }\n"); |
| 264 | 0 | writer.write("#box { border: 1px solid #ccc; margin: 100px auto; width: 500px;" + |
| 265 | |
"background: rgb(210, 233, 217); }\n"); |
| 266 | 0 | writer.write("#box a { font-size: small; text-decoration: none; }\n"); |
| 267 | 0 | writer.write("#box a:link { color: rgb(0, 93, 40); }\n"); |
| 268 | 0 | writer.write("#box a:visited { color: rgb(90, 93, 90); }\n"); |
| 269 | 0 | writer.write("#header { background: rgb(210, 233, 217); border: 1px solid #ccc; }\n"); |
| 270 | 0 | writer.write("#result { padding: 10px 5px; max-width: 550px; }\n"); |
| 271 | 0 | writer.write("#meta { font-size: small; color: rgb(60, 100, 60); }\n"); |
| 272 | 0 | writer.write("#summary { font-size: small; }\n"); |
| 273 | 0 | writer.write("#debug { display: none; }\n"); |
| 274 | 0 | writer.write("</style>"); |
| 275 | 0 | } |
| 276 | |
|
| 277 | |
public void handleMainPage(HttpServletRequest request, HttpServletResponse response) throws IOException { |
| 278 | 0 | PrintWriter writer = response.getWriter(); |
| 279 | 0 | response.setContentType("text/html"); |
| 280 | |
|
| 281 | 0 | writer.append("<html>\n"); |
| 282 | 0 | writer.append("<head>\n"); |
| 283 | 0 | writeStyle(writer); |
| 284 | 0 | writer.append("<title>Galago Search</title></head>"); |
| 285 | 0 | writer.append("<body>"); |
| 286 | 0 | writer.append("<center><br/><br/><div id=\"box\">" + |
| 287 | |
"<a href=\"http://www.galagosearch.org\">" + |
| 288 | |
"<img src=\"http://www.galagosearch.org/galago.png\"/></a><br/>\n"); |
| 289 | 0 | writer.append("<form action=\"search\"><input name=\"q\" size=\"40\">" + |
| 290 | |
"<input value=\"Search\" type=\"submit\" /></form><br/><br/>"); |
| 291 | 0 | writer.append("</div></center></body></html>\n"); |
| 292 | 0 | writer.close(); |
| 293 | 0 | } |
| 294 | |
|
| 295 | |
public void handle(String target, |
| 296 | |
HttpServletRequest request, |
| 297 | |
HttpServletResponse response, |
| 298 | |
int dispatch) throws IOException, ServletException { |
| 299 | 0 | if (request.getPathInfo().equals("/search")) { |
| 300 | |
try { |
| 301 | 0 | handleSearch(request, response); |
| 302 | 0 | } catch(Exception e) { |
| 303 | 0 | throw new ServletException("Caught exception from handleSearch", e); |
| 304 | 0 | } |
| 305 | 0 | } else if (request.getPathInfo().equals("/document")) { |
| 306 | 0 | handleDocument(request, response); |
| 307 | 0 | } else if (request.getPathInfo().equals("/searchxml")) { |
| 308 | |
try { |
| 309 | 0 | handleSearchXML(request, response); |
| 310 | 0 | } catch(Exception e) { |
| 311 | 0 | throw new ServletException("Caught exception from handleSearchXML", e); |
| 312 | 0 | } |
| 313 | 0 | } else if (request.getPathInfo().equals("/snippet")) { |
| 314 | 0 | handleSnippet(request, response); |
| 315 | |
} else { |
| 316 | 0 | handleMainPage(request, response); |
| 317 | |
} |
| 318 | 0 | } |
| 319 | |
|
| 320 | |
private SearchResult performSearch(HttpServletRequest request) throws Exception { |
| 321 | 0 | String query = request.getParameter("q"); |
| 322 | 0 | String startAtString = request.getParameter("start"); |
| 323 | 0 | String countString = request.getParameter("n"); |
| 324 | 0 | int startAt = (startAtString == null) ? 0 : Integer.parseInt(startAtString); |
| 325 | 0 | int resultCount = (countString == null) ? 10 : Integer.parseInt(countString); |
| 326 | 0 | SearchResult result = search.runQuery(query, startAt, resultCount, true); |
| 327 | 0 | return result; |
| 328 | |
} |
| 329 | |
} |