1
2
3 package org.galagosearch.core.retrieval.query;
4
5 import java.util.ArrayList;
6 import java.util.List;
7
8 /***
9 * <p>SimpleQuery parses the kind of queries you might expect for a end-user search engine.
10 * The format is also meant to be similar to Lucene's query format.</p>
11 *
12 * Queries can be single terms:<br/>
13 * <tt>white house</tt><br/>
14 * or phrases:<br/>
15 * <tt>"white house"</tt><br/>
16 * and have fields:
17 * <tt>title:"white house"</tt><br/>
18 * or weights:<br/>
19 * <tt>white^4 house^2</tt><br/>
20 *
21 * <p>A query can be parsed into a list of QueryTerms or translated into a tree of Nodes
22 * which can be used with the StructuredRetrieval code.</p>
23 *
24 * @author trevor
25 */
26 public class SimpleQuery {
27 public static class QueryTerm {
28 public QueryTerm(String text) {
29 this.weight = 1.0;
30 this.field = null;
31 this.text = text;
32 }
33
34 public QueryTerm(String text, String field, double weight) {
35 this.text = text;
36 this.field = field;
37 this.weight = weight;
38 }
39
40 @Override
41 public boolean equals(Object o) {
42 if (!(o instanceof QueryTerm))
43 return false;
44
45 QueryTerm other = (QueryTerm) o;
46 return text.equals(other.text) &&
47 ((field != null) ? field.equals(other.field) : other.field == null) &&
48 weight == other.weight;
49 }
50
51 @Override
52 public int hashCode() {
53 int hash = 5;
54 hash = 41 * hash + (this.text != null ? this.text.hashCode() : 0);
55 hash = 41 * hash + (this.field != null ? this.field.hashCode() : 0);
56 hash = 41 * hash + (int) (Double.doubleToLongBits(this.weight) ^ (Double.
57 doubleToLongBits(this.weight) >>> 32));
58 return hash;
59 }
60
61 @Override
62 public String toString() {
63 String term = text;
64
65
66 if (term.contains(" ")) {
67 term = "\"" + term + "\"";
68
69
70 }
71 if (field != null && weight != 1.0) {
72 return String.format("%s:%s^%f", field, term, weight);
73 }
74 if (field != null) {
75 return String.format("%s:%s", field, term);
76 }
77 if (weight != 1.0) {
78 return String.format("%s^%f", term, weight);
79 }
80 return text;
81 }
82 public String text;
83 public String field;
84 public double weight;
85 }
86
87 /***
88 * The format of the query term is <tt>field:term^weight</tt>.
89 * Both the field and the weight are optional, and the term may
90 * be enclosed in quotes.
91 *
92 * @return A QueryTerm object describing the query term.
93 */
94 public static QueryTerm parseQueryTerm(String term) {
95 double weight = 1.0;
96 String field = null;
97
98 int colon = term.indexOf(':');
99 if (colon >= 0) {
100 field = term.substring(0, colon);
101 term = term.substring(colon + 1);
102 }
103
104 int caret = term.indexOf('^');
105 if (caret >= 0) {
106 weight = Double.parseDouble(term.substring(caret + 1));
107 term = term.substring(0, caret);
108 }
109
110 if (term.startsWith("\"")) {
111 term = term.substring(1);
112 }
113 if (term.endsWith("\"")) {
114 term = term.substring(0, term.length() - 1);
115 }
116 return new QueryTerm(term, field, weight);
117 }
118
119 public static List<String> textQueryTerms(String query) {
120 boolean inQuote = false;
121 int firstNonSpace = query.length() + 1;
122 int i = 0;
123 ArrayList<String> results = new ArrayList<String>();
124
125
126 while (i < query.length()) {
127
128
129
130 for (; i < query.length(); i++) {
131 char c = query.charAt(i);
132
133 if (Character.isSpaceChar(c)) {
134 if (!inQuote) {
135 if (firstNonSpace < i) {
136 String term = query.substring(firstNonSpace, i);
137 results.add(term);
138 }
139 firstNonSpace = query.length() + 1;
140 }
141 } else if (c == '"') {
142 firstNonSpace = Math.min(firstNonSpace, i);
143 inQuote = !inQuote;
144 } else {
145 firstNonSpace = Math.min(firstNonSpace, i);
146 }
147 }
148 }
149
150 if (firstNonSpace < query.length()) {
151 results.add(query.substring(firstNonSpace, query.length()));
152 }
153
154 return results;
155 }
156
157 public static List<QueryTerm> parse(String query) {
158 ArrayList<QueryTerm> results = new ArrayList<QueryTerm>();
159 int position = 0;
160 String term = null;
161
162 List<String> textTerms = textQueryTerms(query);
163 ArrayList<QueryTerm> parsedTerms = new ArrayList<QueryTerm>();
164
165 for (String textTerm : textTerms) {
166 parsedTerms.add(parseQueryTerm(textTerm));
167 }
168
169 return parsedTerms;
170 }
171
172 public static Node parseTree(String query) {
173 List<QueryTerm> terms = parse(query);
174 ArrayList<Node> nodes = new ArrayList<Node>();
175
176 for (QueryTerm term : terms) {
177 Node termNode = new Node("text", term.text);
178
179 if (term.text.contains(" ")) {
180 String[] phraseTerms = term.text.split(" ");
181 ArrayList<Node> children = new ArrayList<Node>();
182 for (String phraseTerm : phraseTerms) {
183 children.add(new Node("text", phraseTerm));
184 }
185 termNode = new Node("ordered", "1", children);
186 }
187
188 if (term.field != null) {
189 ArrayList<Node> children = new ArrayList<Node>();
190 children.add(termNode);
191 children.add(new Node("field", term.field));
192 termNode = new Node("inside", children);
193 }
194
195 if (term.weight != 1.0) {
196 ArrayList<Node> children = new ArrayList<Node>();
197 children.add(termNode);
198 termNode = new Node("scale", Double.toString(term.weight), children);
199 }
200 nodes.add(termNode);
201 }
202
203 if (nodes.size() < 1)
204 return null;
205
206 if (nodes.size() == 1)
207 return nodes.get(0);
208
209 return new Node("combine", nodes);
210 }
211 }