Coverage Report - org.galagosearch.core.retrieval.query.SimpleQuery
 
Classes in this File Line Coverage Branch Coverage Complexity
SimpleQuery
96%
68/71
92%
37/40
0
SimpleQuery$QueryTerm
45%
13/29
27%
7/26
0
 
 1  
 // BSD License (http://www.galagosearch.org/license)
 2  
 
 3  
 package org.galagosearch.core.retrieval.query;
 4  
 
 5  
 import java.util.ArrayList;
 6  
 import java.util.List;
 7  
 
 8  
 /**
 9  
  * <p>SimpleQuery parses the kind of queries you might expect for a end-user search engine.
 10  
  * The format is also meant to be similar to Lucene's query format.</p>
 11  
  * 
 12  
  * Queries can be single terms:<br/>
 13  
  *    <tt>white house</tt><br/>
 14  
  * or phrases:<br/>
 15  
  *    <tt>"white house"</tt><br/>
 16  
  * and have fields:
 17  
  *    <tt>title:"white house"</tt><br/>
 18  
  * or weights:<br/>
 19  
  *    <tt>white^4 house^2</tt><br/>
 20  
  * 
 21  
  * <p>A query can be parsed into a list of QueryTerms or translated into a tree of Nodes
 22  
  * which can be used with the StructuredRetrieval code.</p>
 23  
  * 
 24  
  * @author trevor
 25  
  */
 26  0
 public class SimpleQuery {
 27  0
     public static class QueryTerm {
 28  28
         public QueryTerm(String text) {
 29  28
             this.weight = 1.0;
 30  28
             this.field = null;
 31  28
             this.text = text;
 32  28
         }
 33  
 
 34  84
         public QueryTerm(String text, String field, double weight) {
 35  84
             this.text = text;
 36  84
             this.field = field;
 37  84
             this.weight = weight;
 38  84
         }
 39  
 
 40  
         @Override
 41  
         public boolean equals(Object o) {
 42  40
             if (!(o instanceof QueryTerm))
 43  0
                 return false;
 44  
 
 45  40
             QueryTerm other = (QueryTerm) o;
 46  40
             return text.equals(other.text) &&
 47  
                     ((field != null) ? field.equals(other.field) : other.field == null) &&
 48  
                     weight == other.weight;
 49  
         }
 50  
 
 51  
         @Override
 52  
         public int hashCode() {
 53  0
             int hash = 5;
 54  0
             hash = 41 * hash + (this.text != null ? this.text.hashCode() : 0);
 55  0
             hash = 41 * hash + (this.field != null ? this.field.hashCode() : 0);
 56  0
             hash = 41 * hash + (int) (Double.doubleToLongBits(this.weight) ^ (Double.
 57  
                     doubleToLongBits(this.weight) >>> 32));
 58  0
             return hash;
 59  
         }
 60  
 
 61  
         @Override
 62  
         public String toString() {
 63  0
             String term = text;
 64  
 
 65  
             // if this is a multi-word query, enclose it in quotes
 66  0
             if (term.contains(" ")) {
 67  0
                 term = "\"" + term + "\"";            // use the minimum amount of syntax necessary to
 68  
             // express the query.  If everything is specified, 
 69  
             // the format is field:term^weight.
 70  
             }
 71  0
             if (field != null && weight != 1.0) {
 72  0
                 return String.format("%s:%s^%f", field, term, weight);
 73  
             }
 74  0
             if (field != null) {
 75  0
                 return String.format("%s:%s", field, term);
 76  
             }
 77  0
             if (weight != 1.0) {
 78  0
                 return String.format("%s^%f", term, weight);
 79  
             }
 80  0
             return text;
 81  
         }
 82  
         public String text;
 83  
         public String field;
 84  
         public double weight;
 85  
     }
 86  
 
 87  
     /** 
 88  
      * The format of the query term is <tt>field:term^weight</tt>.
 89  
      * Both the field and the weight are optional, and the term may
 90  
      * be enclosed in quotes.
 91  
      *
 92  
      * @return A QueryTerm object describing the query term.
 93  
      */
 94  
     public static QueryTerm parseQueryTerm(String term) {
 95  72
         double weight = 1.0;
 96  72
         String field = null;
 97  
 
 98  72
         int colon = term.indexOf(':');
 99  72
         if (colon >= 0) {
 100  16
             field = term.substring(0, colon);
 101  16
             term = term.substring(colon + 1);
 102  
         }
 103  
 
 104  72
         int caret = term.indexOf('^');
 105  72
         if (caret >= 0) {
 106  20
             weight = Double.parseDouble(term.substring(caret + 1));
 107  20
             term = term.substring(0, caret);
 108  
         }
 109  
 
 110  72
         if (term.startsWith("\"")) {
 111  16
             term = term.substring(1);
 112  
         }
 113  72
         if (term.endsWith("\"")) {
 114  16
             term = term.substring(0, term.length() - 1);
 115  
         }
 116  72
         return new QueryTerm(term, field, weight);
 117  
     }
 118  
 
 119  
     public static List<String> textQueryTerms(String query) {
 120  32
         boolean inQuote = false;
 121  32
         int firstNonSpace = query.length() + 1;
 122  32
         int i = 0;
 123  32
         ArrayList<String> results = new ArrayList<String>();
 124  
 
 125  
         // each loop parses a single term
 126  64
         while (i < query.length()) {
 127  
             // parsing goes in two phases; first we're trying to bypass inital
 128  
             // spaces before a query term.  after that point, we parse until the
 129  
             // next space that's not in quotes.
 130  560
             for (; i < query.length(); i++) {
 131  264
                 char c = query.charAt(i);
 132  
 
 133  264
                 if (Character.isSpaceChar(c)) {
 134  56
                     if (!inQuote) {
 135  40
                         if (firstNonSpace < i) {
 136  40
                             String term = query.substring(firstNonSpace, i);
 137  40
                             results.add(term);
 138  
                         }
 139  40
                         firstNonSpace = query.length() + 1;
 140  
                     }
 141  208
                 } else if (c == '"') {
 142  32
                     firstNonSpace = Math.min(firstNonSpace, i);
 143  32
                     inQuote = !inQuote;
 144  
                 } else {
 145  176
                     firstNonSpace = Math.min(firstNonSpace, i);
 146  
                 }
 147  
             }
 148  
         }
 149  
 
 150  32
         if (firstNonSpace < query.length()) {
 151  32
             results.add(query.substring(firstNonSpace, query.length()));
 152  
         }
 153  
 
 154  32
         return results;
 155  
     }
 156  
 
 157  
     public static List<QueryTerm> parse(String query) {
 158  32
         ArrayList<QueryTerm> results = new ArrayList<QueryTerm>();
 159  32
         int position = 0;
 160  32
         String term = null;
 161  
 
 162  32
         List<String> textTerms = textQueryTerms(query);
 163  32
         ArrayList<QueryTerm> parsedTerms = new ArrayList<QueryTerm>();
 164  
 
 165  32
         for (String textTerm : textTerms) {
 166  72
             parsedTerms.add(parseQueryTerm(textTerm));
 167  
         }
 168  
 
 169  32
         return parsedTerms;
 170  
     }
 171  
 
 172  
     public static Node parseTree(String query) {
 173  24
         List<QueryTerm> terms = parse(query);
 174  24
         ArrayList<Node> nodes = new ArrayList<Node>();
 175  
 
 176  24
         for (QueryTerm term : terms) {
 177  32
             Node termNode = new Node("text", term.text);
 178  
             // if this is a phrase, put the terms in a ordered window
 179  32
             if (term.text.contains(" ")) {
 180  8
                 String[] phraseTerms = term.text.split(" ");
 181  8
                 ArrayList<Node> children = new ArrayList<Node>();
 182  24
                 for (String phraseTerm : phraseTerms) {
 183  16
                     children.add(new Node("text", phraseTerm));
 184  
                 }
 185  8
                 termNode = new Node("ordered", "1", children);
 186  
             }
 187  
             // if this is in a field, add the field restriction
 188  32
             if (term.field != null) {
 189  8
                 ArrayList<Node> children = new ArrayList<Node>();
 190  8
                 children.add(termNode);
 191  8
                 children.add(new Node("field", term.field));
 192  8
                 termNode = new Node("inside", children);
 193  
             }
 194  
             // if this is weighted, scale it
 195  32
             if (term.weight != 1.0) {
 196  8
                 ArrayList<Node> children = new ArrayList<Node>();
 197  8
                 children.add(termNode);
 198  8
                 termNode = new Node("scale", Double.toString(term.weight), children);
 199  
             }
 200  32
             nodes.add(termNode);
 201  32
         }
 202  
 
 203  24
         if (nodes.size() < 1)
 204  0
             return null;
 205  
         
 206  24
         if (nodes.size() == 1)
 207  20
             return nodes.get(0);
 208  
 
 209  4
         return new Node("combine", nodes);
 210  
     }
 211  
 }