View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.tools;
4   
5   import java.io.IOException;
6   import java.io.PrintWriter;
7   import java.io.UnsupportedEncodingException;
8   import java.net.URLEncoder;
9   import java.util.Arrays;
10  import java.util.HashSet;
11  import java.util.Map.Entry;
12  import java.util.Set;
13  import javax.servlet.ServletException;
14  import javax.servlet.http.HttpServletRequest;
15  import javax.servlet.http.HttpServletResponse;
16  import org.galagosearch.core.parse.Document;
17  import org.galagosearch.core.tools.Search.SearchResult;
18  import org.galagosearch.core.tools.Search.SearchResultItem;
19  import org.mortbay.jetty.handler.AbstractHandler;
20  import org.znerd.xmlenc.XMLOutputter;
21  
22  /***
23   * <p>Handles web search requests against a Galago index.  Also handles XML requests for
24   * documents, snippets and search results.</p>
25   *
26   * <p>This class is set up to work with an embedded Jetty instance, but it should be
27   * fairly easy to wrap into a Servlet for use with something else (Tomcat, Glassfish, etc.)</p>
28   *
29   * <p>URLs supported:</p>
30   *
31   * <table>
32   *   <tr>
33   *     <td>/</td>
34   *     <td>Main Page</td>
35   *   </tr>
36   *   <tr>
37   *     <td>/search</td>
38   *     <td>HTML Search Results (q, start, n)</td>
39   *   </tr>
40   *   <tr>
41   *     <td>/xmlsearch</td>
42   *     <td>XML Search Results (q, start, n)</td>
43   *   </tr>
44   *   <tr>
45   *     <td>/snippet</td>
46   *     <td>XML Snippet Result (identifier, term+)</td>
47   *   </tr>
48   *   <tr>
49   *     <td>/document</td>
50   *     <td>Document Result (identifier)</td>
51   *   </tr>
52   * </table>
53   *
54   * @author trevor
55   */
56  public class SearchWebHandler extends AbstractHandler {
57      Search search;
58  
59      public SearchWebHandler(Search search) {
60          this.search = search;
61      }
62  
63      public String getEscapedString(String text) {
64          StringBuilder builder = new StringBuilder();
65          
66          for (int i = 0; i < text.length(); ++i) {
67              char c = text.charAt(i);
68              if (c >= 128) {
69                  builder.append("&#" + (int)c + ";");
70              } else {
71                  builder.append(c);
72              }
73          }
74  
75          return builder.toString();
76      }
77  
78      public void handleDocument(HttpServletRequest request, HttpServletResponse response) throws IOException {
79          request.getParameterMap();
80          String identifier = request.getParameter("identifier");
81          Document document = search.getDocument(identifier);
82          response.setContentType("text/html; charset=UTF-8");
83  
84          PrintWriter writer = response.getWriter();
85          writer.write(getEscapedString(document.text));
86          writer.close();
87      }
88  
89      public void handleSnippet(HttpServletRequest request, HttpServletResponse response) throws IOException {
90          String identifier = request.getParameter("identifier");
91          String[] terms = request.getParameterValues("term");
92          Set<String> queryTerms = new HashSet<String>(Arrays.asList(terms));
93  
94          Document document = search.getDocument(identifier);
95  
96          if (document == null) {
97              response.setStatus(response.SC_NOT_FOUND);
98          } else {
99              response.setContentType("text/xml");
100             PrintWriter writer = response.getWriter();
101             String snippet = search.getSummary(document, queryTerms);
102             String title = document.metadata.get("title");
103             String url = document.metadata.get("url");
104 
105             if (snippet == null) snippet = "";
106 
107             response.setContentType("text/xml");
108             writer.append("<response>\n");
109             writer.append(String.format("<snippet>%s</snippet>\n", snippet));
110             writer.append(String.format("<identifier>%s</identifier>\n", identifier));
111             writer.append(String.format("<title>%s</title>\n", scrub(title)));
112             writer.append(String.format("<url>%s</url>\n", scrub(url)));
113             writer.append("</response>");
114             writer.close();
115         }
116     }
117 
118     private String scrub(String s) throws UnsupportedEncodingException {
119         if (s == null) return s;
120         return s.replace("<", "&gt;")
121                 .replace(">", "&lt;")
122                 .replace("&", "&amp;");
123     }
124 
125     public void handleSearch(HttpServletRequest request, HttpServletResponse response) throws Exception {
126         SearchResult result = performSearch(request);
127         response.setContentType("text/html");
128         String displayQuery = scrub(request.getParameter("q"));
129         String encodedQuery = URLEncoder.encode(request.getParameter("q"), "UTF-8");
130 
131         PrintWriter writer = response.getWriter();
132         writer.append("<html>\n");
133         writer.append("<head>\n");
134         writer.append(String.format("<title>%s - Galago Search</title>\n", displayQuery));
135         writeStyle(writer);
136         writer.append("<script type=\"text/javascript\">\n");
137         writer.append("function toggleDebug() {\n");
138         writer.append("   var object = document.getElementById('debug');\n");
139         writer.append("   if (object.style.display != 'block') {\n");
140         writer.append("     object.style.display = 'block';\n");
141         writer.append("  } else {\n");
142         writer.append("     object.style.display = 'none';\n");
143         writer.append("  }\n");
144         writer.append("}\n");
145         writer.append("</script>\n");
146         writer.append("</head>\n<body>\n");
147 
148         writer.append("<div id=\"header\">\n");
149         writer.append("<table><tr>");
150         writer.append("<td><a href=\"http://www.galagosearch.org\">" +
151                       "<img src=\"http://www.galagosearch.org/galago.png\"></a></td>");
152         writer.append("<td><br/><form action=\"search\">" +
153                       String.format("<input name=\"q\" size=\"40\" value=\"%s\" />", displayQuery) +
154                       "<input value=\"Search\" type=\"submit\" /></form></td>");
155         writer.append("</tr>");
156         writer.append("</table>\n");
157         writer.append("</div>\n");
158 
159         writer.append("<center>[<a href=\"#\" onClick=\"toggleDebug(); return false;\">debug</a>]</center>");
160         writer.append("<div id=\"debug\">");
161         writer.append("<table>");
162         writer.append(String.format("<tr><td>%s</td><td>%s</td></tr>",
163                       "Parsed Query", result.query.toString()));
164         writer.append(String.format("<tr><td>%s</td><td>%s</td></tr>",
165                       "Transformed Query", result.transformedQuery.toString()));
166         writer.append("</table>");
167         writer.append("</div>");
168 
169         for (SearchResultItem item : result.items) {
170             writer.append("<div id=\"result\">\n");
171             writer.append(String.format("<a href=\"document?identifier=%s\">%s</a><br/>" +
172                                         "<div id=\"summary\">%s</div>\n" +
173                                         "<div id=\"meta\">%s - %s</div>\n",
174                                         item.identifier,
175                                         item.displayTitle,
176                                         item.summary,
177                                         scrub(item.identifier),
178                                         scrub(item.url)));
179             writer.append("</div>\n");
180         }
181 
182         String startAtString = request.getParameter("start");
183         String countString = request.getParameter("n");
184         int startAt = 0;
185         int count = 10;
186 
187         if (startAtString != null) {
188             startAt = Integer.parseInt(startAtString);
189         }
190         if (countString != null) {
191             count = Integer.parseInt(countString);
192         }
193 
194         writer.append("<center>\n");
195         if (startAt != 0) {
196             writer.append(String.format("<a href=\"search?q=%s&start=%d&n=%d\">Previous</a>",
197                                         encodedQuery, Math.max(startAt-count,0), count));
198             if (result.items.size() >= count) {
199                 writer.append(" | ");
200             }
201         }
202 
203         if (result.items.size() >= count) {
204             writer.append(String.format("<a href=\"search?q=%s&start=%d&n=%d\">Next</a>",
205                                         encodedQuery, startAt+count, count));
206         }
207         writer.append("</center>");
208         writer.append("</body>");
209         writer.append("</html>");
210         writer.close();
211     }
212 
213     public void handleSearchXML(HttpServletRequest request, HttpServletResponse response) throws IllegalStateException, IllegalArgumentException, IOException, Exception {
214         SearchResult result = performSearch(request);
215         PrintWriter writer = response.getWriter();
216         XMLOutputter outputter = new XMLOutputter(writer, "UTF-8");
217         response.setContentType("text/xml");
218 
219         outputter.startTag("response");
220 
221         writer.append("<response>\n");
222         for (SearchResultItem item : result.items) {
223             outputter.startTag("result");
224             
225             outputter.startTag("identifier");
226             outputter.pcdata(item.identifier);
227             outputter.endTag();
228             
229             outputter.startTag("title");
230             outputter.pcdata(item.displayTitle);
231             outputter.endTag();
232 
233             outputter.startTag("url");
234             outputter.pcdata(item.url);
235             outputter.endTag();
236 
237             outputter.startTag("snippet");
238             outputter.pcdata(item.summary);
239             outputter.endTag();
240 
241             outputter.startTag("rank");
242             outputter.pcdata("" + item.rank);
243             outputter.endTag();
244             
245             outputter.startTag("metadata");
246             for (Entry<String, String> entry : item.metadata.entrySet()) {
247                 outputter.startTag("item");
248                 outputter.startTag("key");
249                 outputter.pcdata(entry.getKey());
250                 outputter.endTag();
251                 outputter.startTag("value");
252                 outputter.pcdata(entry.getValue());
253                 outputter.endTag();
254             }
255             
256             outputter.endTag();
257         }
258     }
259 
260     public void writeStyle(PrintWriter writer) {
261         writer.write("<style type=\"text/css\">\n");
262         writer.write("body { font-family: Helvetica, sans-serif; }\n");
263         writer.write("img { border-style: none; }\n");
264         writer.write("#box { border: 1px solid #ccc; margin: 100px auto; width: 500px;" +
265                      "background: rgb(210, 233, 217); }\n");
266         writer.write("#box a { font-size: small; text-decoration: none; }\n");
267         writer.write("#box a:link { color: rgb(0, 93, 40); }\n");
268         writer.write("#box a:visited { color: rgb(90, 93, 90); }\n");
269         writer.write("#header { background: rgb(210, 233, 217); border: 1px solid #ccc; }\n");
270         writer.write("#result { padding: 10px 5px; max-width: 550px; }\n");
271         writer.write("#meta { font-size: small; color: rgb(60, 100, 60); }\n");
272         writer.write("#summary { font-size: small; }\n");
273         writer.write("#debug { display: none; }\n");
274         writer.write("</style>");
275     }
276 
277     public void handleMainPage(HttpServletRequest request, HttpServletResponse response) throws IOException {
278         PrintWriter writer = response.getWriter();
279         response.setContentType("text/html");
280 
281         writer.append("<html>\n");
282         writer.append("<head>\n");
283         writeStyle(writer);
284         writer.append("<title>Galago Search</title></head>");
285         writer.append("<body>");
286         writer.append("<center><br/><br/><div id=\"box\">" +
287                       "<a href=\"http://www.galagosearch.org\">" +
288                       "<img src=\"http://www.galagosearch.org/galago.png\"/></a><br/>\n");
289         writer.append("<form action=\"search\"><input name=\"q\" size=\"40\">" +
290                       "<input value=\"Search\" type=\"submit\" /></form><br/><br/>");
291         writer.append("</div></center></body></html>\n");
292         writer.close();
293     }
294 
295     public void handle(String target,
296             HttpServletRequest request,
297             HttpServletResponse response,
298             int dispatch) throws IOException, ServletException {
299         if (request.getPathInfo().equals("/search")) {
300             try {
301                 handleSearch(request, response);
302             } catch(Exception e) {
303                 throw new ServletException("Caught exception from handleSearch", e);
304             }
305         } else if (request.getPathInfo().equals("/document")) {
306             handleDocument(request, response);
307         } else if (request.getPathInfo().equals("/searchxml")) {
308             try {
309                 handleSearchXML(request, response);
310             } catch(Exception e) {
311                 throw new ServletException("Caught exception from handleSearchXML", e);
312             }
313         } else if (request.getPathInfo().equals("/snippet")) {
314             handleSnippet(request, response);
315         } else {
316             handleMainPage(request, response);
317         }
318     }
319 
320     private SearchResult performSearch(HttpServletRequest request) throws Exception {
321         String query = request.getParameter("q");
322         String startAtString = request.getParameter("start");
323         String countString = request.getParameter("n");
324         int startAt = (startAtString == null) ? 0 : Integer.parseInt(startAtString);
325         int resultCount = (countString == null) ? 10 : Integer.parseInt(countString);
326         SearchResult result = search.runQuery(query, startAt, resultCount, true);
327         return result;
328     }
329 }