Search on steroids! Searching now takes complicated queries.

This commit is contained in:
Raoul Snyman 2010-02-22 22:46:35 +02:00
parent 31fc3695c3
commit d2bfec7c2e
4 changed files with 81 additions and 60 deletions

View File

@ -22,6 +22,7 @@
import logging
from datetime import datetime
from pprint import pformat
from sqlalchemy.sql import or_
@ -118,11 +119,30 @@ class BlogController(BaseController):
if not c.querystring:
h.flash.set_message(u'You didn\'t supply anything to search for.', u'error')
h.redirect_to('/')
keywords = [or_(Post.body.contains(kw.strip()), Post.title.contains(kw.strip())) for kw in c.querystring.split(',')]
c.page_title = u'Search'
kwprocessor = utils.KeywordProcessor(
groups=[None, '+', '-'],
group=tuple,
normalize=lambda s: s.strip(' \"\'')
)
keywords, ands, nots = kwprocessor.split(c.querystring)
or_clauses = []
for kw in keywords:
or_clauses.append(Post.body.contains(kw.strip()))
or_clauses.append(Post.title.contains(kw.strip()))
and_clauses = [or_(Post.body.contains(aw.strip()),
Post.title.contains(aw.strip())) for aw in ands]
not_clauses = [or_(Post.body.contains(nw.strip()),
Post.title.contains(nw.strip())) for nw in nots]
c.posts = Session.query(Post)
for or_clause in keywords:
c.posts = c.posts.filter(or_clause)
c.posts = c.posts.all()
if len(or_clauses) > 0:
c.posts = c.posts.filter(or_(*or_clauses))
if len(and_clauses) > 0:
for and_clause in and_clauses:
c.posts = c.posts.filter(and_clause)
if len(not_clauses) > 0:
for not_clause in not_clauses:
c.posts = c.posts.filter(~not_clause)
c.posts = c.posts.order_by(Post.created.desc()).all()
c.page_title = u'Search'
return render(u'/blog/search.mako')

View File

@ -33,72 +33,66 @@ from turbomail import Message
from scribeengine.lib.base import render, h
class KeywordProcessor(object):
"""Process user-supplied keywords, tags, or search terms.
"""Process user-supplied keywords, tags, or search terms.
This tries to be as flexible as possible while being efficient.
The vast majority of the work is done in the regular expression."""
This tries to be as flexible as possible while being efficient.
The vast majority of the work is done in the regular expression."""
def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list):
"""Configure the processor.
def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list):
"""Configure the processor.
separators: A list of acceptable separator characters. The first will be used for joins.
quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable.
groups: Pass a string, list, or tuple of allowable prefixes. E.g. '+-' or None to disable.
group: Pass in the type you want to group by, e.g. list, tuple, or dict.
normalize: Pass a function which will normalize the results. E.g. lambda s: s.lower().strip(' \"')
sort: Sort the resulting list (or lists) alphabeticlly.
result: The return type. One of set, tuple, list.
separators: A list of acceptable separator characters. The first will be used for joins.
quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable.
groups: Pass a string, list, or tuple of allowable prefixes. E.g. '+-' or None to disable.
group: Pass in the type you want to group by, e.g. list, tuple, or dict.
normalize: Pass a function which will normalize the results. E.g. lambda s: s.lower().strip(' \"')
sort: Sort the resulting list (or lists) alphabeticlly.
result: The return type. One of set, tuple, list.
If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...]
"""
separators = list(separators)
self.pattern = ''.join((
('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators.
'(',
('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -.
''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes.
('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace.
')',
('[%s]*' % (''.join(separators), )), # Match possible separator character.
))
self.regex = re.compile(self.pattern)
self.groups = list(groups)
self.group = dict if group is True else group
self.normalize = normalize
self.sort = sort
self.result = result
If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...]
"""
separators = list(separators)
self.pattern = ''.join((
('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators.
'(',
('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -.
''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes.
('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace.
')',
('[%s]*' % (''.join(separators), )), # Match possible separator character.
))
self.regex = re.compile(self.pattern)
self.groups = list(groups)
self.group = dict if group is True else group
self.normalize = normalize
self.sort = sort
self.result = result
def split(self, value):
if not isinstance(value, basestring): raise TypeError("Invalid type for argument 'value'.")
if not isinstance(value, basestring):
raise TypeError("Invalid type for argument 'value'.")
matches = self.regex.findall(value)
if callable(self.normalize): matches = [self.normalize(i) for i in matches]
if self.sort: matches.sort()
if not self.groups: return self.result(matches)
if callable(self.normalize):
matches = [self.normalize(i) for i in matches]
if self.sort:
matches.sort()
if not self.groups:
return self.result(matches)
groups = dict([(i, list()) for i in self.groups])
if None not in groups.iterkeys(): groups[None] = list() # To prevent errors.
if None not in groups.iterkeys():
groups[None] = list() # To prevent errors.
for i in matches:
if i[0] in self.groups:
groups[i[0]].append(i[1:])
else:
groups[None].append(i)
if self.group is dict: return groups
if self.group is dict:
return groups
if self.group is False or self.group is None:
results = []
for group in self.groups:
results.extend([(group, match) for match in groups[group]])
return self.result(results)
return self.group([[match for match in groups[group]] for group in self.groups])

View File

@ -23,4 +23,11 @@
% endfor
% else:
<div class="post"><p>Sorry, there seem to be no results for your search query.</p></div>
<h3>Search tips:</h3>
<ul>
<li>Words should be space-separated.</li>
<li>Use quotes (&quot;) to group phrases.</li>
<li>Use plus (+) to make words/phrases mandatory.</li>
<li>Use minus (-) to exclude posts with those words/phrases.</li>
</ul>
% endif

View File

@ -3,7 +3,7 @@
<li id="search">
<form id="searchform" method="get" action="/search">
<div>
<input type="text" name="q" id="s" size="15" />
<input type="text" name="q" id="s" size="15" value="${c.querystring}" />
<br />
<input type="submit" value="Search" />
</div>