Search on steroids! Searching now takes complicated queries.

2010-02-22 22:46:35 +02:00 · 2010-02-22 22:46:35 +02:00 · d2bfec7c2e
parent 31fc3695c3
commit d2bfec7c2e
4 changed files with 81 additions and 60 deletions
--- a/scribeengine/controllers/blog.py
+++ b/scribeengine/controllers/blog.py
@ -22,6 +22,7 @@

 import logging
 from datetime import datetime
+from pprint import pformat

 from sqlalchemy.sql import or_

@ -118,11 +119,30 @@ class BlogController(BaseController):
        if not c.querystring:
            h.flash.set_message(u'You didn\'t supply anything to search for.', u'error')
            h.redirect_to('/')
-        keywords = [or_(Post.body.contains(kw.strip()), Post.title.contains(kw.strip())) for kw in c.querystring.split(',')]
-        c.page_title = u'Search'
+        kwprocessor = utils.KeywordProcessor(
+            groups=[None, '+', '-'],
+            group=tuple,
+            normalize=lambda s: s.strip(' \"\'')
+        )
+        keywords, ands, nots = kwprocessor.split(c.querystring)
+        or_clauses = []
+        for kw in keywords:
+            or_clauses.append(Post.body.contains(kw.strip()))
+            or_clauses.append(Post.title.contains(kw.strip()))
+        and_clauses = [or_(Post.body.contains(aw.strip()),
+                           Post.title.contains(aw.strip())) for aw in ands]
+        not_clauses = [or_(Post.body.contains(nw.strip()),
+                           Post.title.contains(nw.strip())) for nw in nots]
        c.posts = Session.query(Post)
-        for or_clause in keywords:
-            c.posts = c.posts.filter(or_clause)
-        c.posts = c.posts.all()
+        if len(or_clauses) > 0:
+            c.posts = c.posts.filter(or_(*or_clauses))
+        if len(and_clauses) > 0:
+            for and_clause in and_clauses:
+                c.posts = c.posts.filter(and_clause)
+        if len(not_clauses) > 0:
+            for not_clause in not_clauses:
+                c.posts = c.posts.filter(~not_clause)
+        c.posts = c.posts.order_by(Post.created.desc()).all()
+        c.page_title = u'Search'
        return render(u'/blog/search.mako')

--- a/scribeengine/lib/utils.py
+++ b/scribeengine/lib/utils.py
@ -33,72 +33,66 @@ from turbomail import Message
 from scribeengine.lib.base import render, h

 class KeywordProcessor(object):
-  """Process user-supplied keywords, tags, or search terms.
-  
-  This tries to be as flexible as possible while being efficient.
-  The vast majority of the work is done in the regular expression."""
-  
-  def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list):
-    """Configure the processor.
-    
-    separators: A list of acceptable separator characters.  The first will be used for joins.
-    quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable.
-    groups: Pass a string, list, or tuple of allowable prefixes.  E.g. '+-' or None to disable.
-    group: Pass in the type you want to group by, e.g. list, tuple, or dict.
-    normalize: Pass a function which will normalize the results.  E.g. lambda s: s.lower().strip(' \"')
-    sort: Sort the resulting list (or lists) alphabeticlly.
-    result: The return type.  One of set, tuple, list.
-    
-    If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...]
-    """
-    
-    separators = list(separators)
-    
-    self.pattern = ''.join((
-            ('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators.
-            '(',
-                ('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -.
-                ''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes.
-                ('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace.
-            ')',
-            ('[%s]*' % (''.join(separators), )), # Match possible separator character.
-      ))
-    self.regex = re.compile(self.pattern)
-    
-    self.groups = list(groups)
-    self.group = dict if group is True else group
-    self.normalize = normalize
-    self.sort = sort
-    self.result = result
-    
+    """Process user-supplied keywords, tags, or search terms.
+
+    This tries to be as flexible as possible while being efficient.
+    The vast majority of the work is done in the regular expression."""
+
+    def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list):
+        """Configure the processor.
+
+        separators: A list of acceptable separator characters.  The first will be used for joins.
+        quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable.
+        groups: Pass a string, list, or tuple of allowable prefixes.  E.g. '+-' or None to disable.
+        group: Pass in the type you want to group by, e.g. list, tuple, or dict.
+        normalize: Pass a function which will normalize the results.  E.g. lambda s: s.lower().strip(' \"')
+        sort: Sort the resulting list (or lists) alphabeticlly.
+        result: The return type.  One of set, tuple, list.
+
+        If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...]
+        """
+        separators = list(separators)
+        self.pattern = ''.join((
+                ('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators.
+                '(',
+                    ('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -.
+                    ''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes.
+                    ('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace.
+                ')',
+                ('[%s]*' % (''.join(separators), )), # Match possible separator character.
+            ))
+        self.regex = re.compile(self.pattern)
+        self.groups = list(groups)
+        self.group = dict if group is True else group
+        self.normalize = normalize
+        self.sort = sort
+        self.result = result
+
    def split(self, value):
-        if not isinstance(value, basestring): raise TypeError("Invalid type for argument 'value'.")
-      
+        if not isinstance(value, basestring):
+            raise TypeError("Invalid type for argument 'value'.")
        matches = self.regex.findall(value)
-      
-        if callable(self.normalize): matches = [self.normalize(i) for i in matches]
-        if self.sort: matches.sort()
-        if not self.groups: return self.result(matches)
-      
+        if callable(self.normalize):
+            matches = [self.normalize(i) for i in matches]
+        if self.sort:
+            matches.sort()
+        if not self.groups:
+            return self.result(matches)
        groups = dict([(i, list()) for i in self.groups])
-        if None not in groups.iterkeys(): groups[None] = list() # To prevent errors.
-      
+        if None not in groups.iterkeys():
+            groups[None] = list() # To prevent errors.
        for i in matches:
            if i[0] in self.groups:
                groups[i[0]].append(i[1:])
            else:
                groups[None].append(i)
-            
-        if self.group is dict: return groups
-            
+        if self.group is dict:
+            return groups
        if self.group is False or self.group is None:
            results = []
-              
            for group in self.groups:
                results.extend([(group, match) for match in groups[group]])
-                
            return self.result(results)
-                
        return self.group([[match for match in groups[group]] for group in self.groups])


--- a/scribeengine/templates/blog/search.mako
+++ b/scribeengine/templates/blog/search.mako
@ -23,4 +23,11 @@
 % endfor
 % else:
        <div class="post"><p>Sorry, there seem to be no results for your search query.</p></div>
-% endif
+        <h3>Search tips:</h3>
+        <ul>
+            <li>Words should be space-separated.</li>
+            <li>Use quotes (&quot;) to group phrases.</li>
+            <li>Use plus (+) to make words/phrases mandatory.</li>
+            <li>Use minus (-) to exclude posts with those words/phrases.</li>
+        </ul>
+% endif
--- a/scribeengine/templates/sidebar.mako
+++ b/scribeengine/templates/sidebar.mako
@ -3,7 +3,7 @@
            <li id="search">
                <form id="searchform" method="get" action="/search">
                    <div>
-                        <input type="text" name="q" id="s" size="15" />
+                        <input type="text" name="q" id="s" size="15" value="${c.querystring}" />
                        <br />
                        <input type="submit" value="Search" />
                    </div>