Search on steroids! Searching now takes complicated queries.

This commit is contained in:
Raoul Snyman 2010-02-22 22:46:35 +02:00
parent 31fc3695c3
commit d2bfec7c2e
4 changed files with 81 additions and 60 deletions

View File

@ -22,6 +22,7 @@
import logging import logging
from datetime import datetime from datetime import datetime
from pprint import pformat
from sqlalchemy.sql import or_ from sqlalchemy.sql import or_
@ -118,11 +119,30 @@ class BlogController(BaseController):
if not c.querystring: if not c.querystring:
h.flash.set_message(u'You didn\'t supply anything to search for.', u'error') h.flash.set_message(u'You didn\'t supply anything to search for.', u'error')
h.redirect_to('/') h.redirect_to('/')
keywords = [or_(Post.body.contains(kw.strip()), Post.title.contains(kw.strip())) for kw in c.querystring.split(',')] kwprocessor = utils.KeywordProcessor(
c.page_title = u'Search' groups=[None, '+', '-'],
group=tuple,
normalize=lambda s: s.strip(' \"\'')
)
keywords, ands, nots = kwprocessor.split(c.querystring)
or_clauses = []
for kw in keywords:
or_clauses.append(Post.body.contains(kw.strip()))
or_clauses.append(Post.title.contains(kw.strip()))
and_clauses = [or_(Post.body.contains(aw.strip()),
Post.title.contains(aw.strip())) for aw in ands]
not_clauses = [or_(Post.body.contains(nw.strip()),
Post.title.contains(nw.strip())) for nw in nots]
c.posts = Session.query(Post) c.posts = Session.query(Post)
for or_clause in keywords: if len(or_clauses) > 0:
c.posts = c.posts.filter(or_clause) c.posts = c.posts.filter(or_(*or_clauses))
c.posts = c.posts.all() if len(and_clauses) > 0:
for and_clause in and_clauses:
c.posts = c.posts.filter(and_clause)
if len(not_clauses) > 0:
for not_clause in not_clauses:
c.posts = c.posts.filter(~not_clause)
c.posts = c.posts.order_by(Post.created.desc()).all()
c.page_title = u'Search'
return render(u'/blog/search.mako') return render(u'/blog/search.mako')

View File

@ -33,72 +33,66 @@ from turbomail import Message
from scribeengine.lib.base import render, h from scribeengine.lib.base import render, h
class KeywordProcessor(object): class KeywordProcessor(object):
"""Process user-supplied keywords, tags, or search terms. """Process user-supplied keywords, tags, or search terms.
This tries to be as flexible as possible while being efficient. This tries to be as flexible as possible while being efficient.
The vast majority of the work is done in the regular expression.""" The vast majority of the work is done in the regular expression."""
def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list): def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list):
"""Configure the processor. """Configure the processor.
separators: A list of acceptable separator characters. The first will be used for joins. separators: A list of acceptable separator characters. The first will be used for joins.
quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable. quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable.
groups: Pass a string, list, or tuple of allowable prefixes. E.g. '+-' or None to disable. groups: Pass a string, list, or tuple of allowable prefixes. E.g. '+-' or None to disable.
group: Pass in the type you want to group by, e.g. list, tuple, or dict. group: Pass in the type you want to group by, e.g. list, tuple, or dict.
normalize: Pass a function which will normalize the results. E.g. lambda s: s.lower().strip(' \"') normalize: Pass a function which will normalize the results. E.g. lambda s: s.lower().strip(' \"')
sort: Sort the resulting list (or lists) alphabeticlly. sort: Sort the resulting list (or lists) alphabeticlly.
result: The return type. One of set, tuple, list. result: The return type. One of set, tuple, list.
If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...] If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...]
""" """
separators = list(separators)
separators = list(separators) self.pattern = ''.join((
('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators.
self.pattern = ''.join(( '(',
('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators. ('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -.
'(', ''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes.
('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -. ('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace.
''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes. ')',
('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace. ('[%s]*' % (''.join(separators), )), # Match possible separator character.
')', ))
('[%s]*' % (''.join(separators), )), # Match possible separator character. self.regex = re.compile(self.pattern)
)) self.groups = list(groups)
self.regex = re.compile(self.pattern) self.group = dict if group is True else group
self.normalize = normalize
self.groups = list(groups) self.sort = sort
self.group = dict if group is True else group self.result = result
self.normalize = normalize
self.sort = sort
self.result = result
def split(self, value): def split(self, value):
if not isinstance(value, basestring): raise TypeError("Invalid type for argument 'value'.") if not isinstance(value, basestring):
raise TypeError("Invalid type for argument 'value'.")
matches = self.regex.findall(value) matches = self.regex.findall(value)
if callable(self.normalize):
if callable(self.normalize): matches = [self.normalize(i) for i in matches] matches = [self.normalize(i) for i in matches]
if self.sort: matches.sort() if self.sort:
if not self.groups: return self.result(matches) matches.sort()
if not self.groups:
return self.result(matches)
groups = dict([(i, list()) for i in self.groups]) groups = dict([(i, list()) for i in self.groups])
if None not in groups.iterkeys(): groups[None] = list() # To prevent errors. if None not in groups.iterkeys():
groups[None] = list() # To prevent errors.
for i in matches: for i in matches:
if i[0] in self.groups: if i[0] in self.groups:
groups[i[0]].append(i[1:]) groups[i[0]].append(i[1:])
else: else:
groups[None].append(i) groups[None].append(i)
if self.group is dict:
if self.group is dict: return groups return groups
if self.group is False or self.group is None: if self.group is False or self.group is None:
results = [] results = []
for group in self.groups: for group in self.groups:
results.extend([(group, match) for match in groups[group]]) results.extend([(group, match) for match in groups[group]])
return self.result(results) return self.result(results)
return self.group([[match for match in groups[group]] for group in self.groups]) return self.group([[match for match in groups[group]] for group in self.groups])

View File

@ -23,4 +23,11 @@
% endfor % endfor
% else: % else:
<div class="post"><p>Sorry, there seem to be no results for your search query.</p></div> <div class="post"><p>Sorry, there seem to be no results for your search query.</p></div>
% endif <h3>Search tips:</h3>
<ul>
<li>Words should be space-separated.</li>
<li>Use quotes (&quot;) to group phrases.</li>
<li>Use plus (+) to make words/phrases mandatory.</li>
<li>Use minus (-) to exclude posts with those words/phrases.</li>
</ul>
% endif

View File

@ -3,7 +3,7 @@
<li id="search"> <li id="search">
<form id="searchform" method="get" action="/search"> <form id="searchform" method="get" action="/search">
<div> <div>
<input type="text" name="q" id="s" size="15" /> <input type="text" name="q" id="s" size="15" value="${c.querystring}" />
<br /> <br />
<input type="submit" value="Search" /> <input type="submit" value="Search" />
</div> </div>