View Javadoc

1   // BSD License (http://www.galagosearch.org/license)
2   
3   package org.galagosearch.core.tools;
4   
5   import org.galagosearch.core.retrieval.*;
6   import java.io.IOException;
7   import java.util.ArrayList;
8   import java.util.List;
9   import java.util.Map;
10  import java.util.Set;
11  import org.galagosearch.core.parse.Document;
12  import org.galagosearch.core.retrieval.query.Node;
13  import org.galagosearch.core.retrieval.query.SimpleQuery;
14  import org.galagosearch.core.retrieval.query.StructuredQuery;
15  import org.galagosearch.core.store.DocumentStore;
16  import org.galagosearch.core.store.SnippetGenerator;
17  import org.galagosearch.tupleflow.Parameters;
18  
19  /***
20   *
21   * @author trevor
22   */
23  public class Search {
24      SnippetGenerator generator;
25      DocumentStore store;
26      Retrieval retrieval;
27  
28      public Search(Retrieval retrieval, DocumentStore store) {
29          this.store = store;
30          this.retrieval = retrieval;
31          generator = new SnippetGenerator();
32      }
33  
34      public void close() throws IOException {
35          store.close();
36          retrieval.close();
37      }
38  
39      public static class SearchResult {
40          public Node query;
41          public Node transformedQuery;
42          public List<SearchResultItem> items;
43      }
44  
45      public static class SearchResultItem {
46          public int rank;
47          public String identifier;
48          public String displayTitle;
49          public String url;
50          public Map<String, String> metadata;
51          public String summary;
52      }
53  
54      public String getSummary(Document document, Set<String> query) throws IOException {
55          if (document.metadata.containsKey("description")) {
56              String description = document.metadata.get("description");
57  
58              if (description.length() > 10) {
59                  return generator.highlight(description, query);
60              }
61          }
62  
63          return generator.getSnippet(document.text, query);
64      }
65  
66      public static Node parseQuery(String query, Parameters parameters) {
67          String queryType = parameters.get("queryType", "complex");
68  
69          if (queryType.equals("simple")) {
70              return SimpleQuery.parseTree(query);
71          }
72  
73          return StructuredQuery.parse(query);
74      }
75  
76      public Document getDocument(String identifier) throws IOException {
77          return store.get(identifier);
78      }
79  
80      public SearchResult runQuery(String query, int startAt, int count, boolean summarize) throws Exception {
81          Node tree = parseQuery(query, new Parameters());
82          Node transformed = retrieval.transformQuery(tree);
83          ScoredDocument[] results = retrieval.runQuery(transformed, startAt + count);
84          SearchResult result = new SearchResult();
85          Set<String> queryTerms = StructuredQuery.findQueryTerms(tree);
86          result.query = tree;
87          result.transformedQuery = transformed;
88          result.items = new ArrayList();
89  
90          for (int i = startAt; i < Math.min(startAt + count, results.length); i++) {
91              String identifier = retrieval.getDocumentName(results[i].document);
92              Document document = getDocument(identifier);
93              SearchResultItem item = new SearchResultItem();
94  
95              item.rank = i + 1;
96              item.identifier = identifier;
97              item.displayTitle = identifier;
98  
99              if (document.metadata.containsKey("title")) {
100                 item.displayTitle = document.metadata.get("title");
101             }
102 
103             if (item.displayTitle != null) {
104                 item.displayTitle = generator.highlight(item.displayTitle, queryTerms);
105             }
106 
107             if (document.metadata.containsKey("url")) {
108                 item.url = document.metadata.get("url");
109             }
110 
111             if (summarize) {
112                 item.summary = getSummary(document, queryTerms);
113             }
114 
115             item.metadata = document.metadata;
116             result.items.add(item);
117         }
118 
119         return result;
120     }
121 }