Advanced searching.
This commit is contained in:
commit
35fd519136
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from pprint import pformat
|
||||||
|
|
||||||
from sqlalchemy.sql import or_
|
from sqlalchemy.sql import or_
|
||||||
|
|
||||||
@ -118,11 +119,30 @@ class BlogController(BaseController):
|
|||||||
if not c.querystring:
|
if not c.querystring:
|
||||||
h.flash.set_message(u'You didn\'t supply anything to search for.', u'error')
|
h.flash.set_message(u'You didn\'t supply anything to search for.', u'error')
|
||||||
h.redirect_to('/')
|
h.redirect_to('/')
|
||||||
keywords = [or_(Post.body.contains(kw.strip()), Post.title.contains(kw.strip())) for kw in c.querystring.split(',')]
|
kwprocessor = utils.KeywordProcessor(
|
||||||
c.page_title = u'Search'
|
groups=[None, '+', '-'],
|
||||||
|
group=tuple,
|
||||||
|
normalize=lambda s: s.strip(' \"\'')
|
||||||
|
)
|
||||||
|
keywords, ands, nots = kwprocessor.split(c.querystring)
|
||||||
|
or_clauses = []
|
||||||
|
for kw in keywords:
|
||||||
|
or_clauses.append(Post.body.contains(kw.strip()))
|
||||||
|
or_clauses.append(Post.title.contains(kw.strip()))
|
||||||
|
and_clauses = [or_(Post.body.contains(aw.strip()),
|
||||||
|
Post.title.contains(aw.strip())) for aw in ands]
|
||||||
|
not_clauses = [or_(Post.body.contains(nw.strip()),
|
||||||
|
Post.title.contains(nw.strip())) for nw in nots]
|
||||||
c.posts = Session.query(Post)
|
c.posts = Session.query(Post)
|
||||||
for or_clause in keywords:
|
if len(or_clauses) > 0:
|
||||||
c.posts = c.posts.filter(or_clause)
|
c.posts = c.posts.filter(or_(*or_clauses))
|
||||||
c.posts = c.posts.all()
|
if len(and_clauses) > 0:
|
||||||
|
for and_clause in and_clauses:
|
||||||
|
c.posts = c.posts.filter(and_clause)
|
||||||
|
if len(not_clauses) > 0:
|
||||||
|
for not_clause in not_clauses:
|
||||||
|
c.posts = c.posts.filter(~not_clause)
|
||||||
|
c.posts = c.posts.order_by(Post.created.desc()).all()
|
||||||
|
c.page_title = u'Search'
|
||||||
return render(u'/blog/search.mako')
|
return render(u'/blog/search.mako')
|
||||||
|
|
||||||
|
@ -33,72 +33,66 @@ from turbomail import Message
|
|||||||
from scribeengine.lib.base import render, h
|
from scribeengine.lib.base import render, h
|
||||||
|
|
||||||
class KeywordProcessor(object):
|
class KeywordProcessor(object):
|
||||||
"""Process user-supplied keywords, tags, or search terms.
|
"""Process user-supplied keywords, tags, or search terms.
|
||||||
|
|
||||||
This tries to be as flexible as possible while being efficient.
|
This tries to be as flexible as possible while being efficient.
|
||||||
The vast majority of the work is done in the regular expression."""
|
The vast majority of the work is done in the regular expression."""
|
||||||
|
|
||||||
def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list):
|
def __init__(self, separators=' \t', quotes=['"', "'"], groups=[], group=False, normalize=None, sort=False, result=list):
|
||||||
"""Configure the processor.
|
"""Configure the processor.
|
||||||
|
|
||||||
separators: A list of acceptable separator characters. The first will be used for joins.
|
separators: A list of acceptable separator characters. The first will be used for joins.
|
||||||
quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable.
|
quotes: Pass a list or tuple of allowable quotes. E.g. ["\"", "'"] or None to disable.
|
||||||
groups: Pass a string, list, or tuple of allowable prefixes. E.g. '+-' or None to disable.
|
groups: Pass a string, list, or tuple of allowable prefixes. E.g. '+-' or None to disable.
|
||||||
group: Pass in the type you want to group by, e.g. list, tuple, or dict.
|
group: Pass in the type you want to group by, e.g. list, tuple, or dict.
|
||||||
normalize: Pass a function which will normalize the results. E.g. lambda s: s.lower().strip(' \"')
|
normalize: Pass a function which will normalize the results. E.g. lambda s: s.lower().strip(' \"')
|
||||||
sort: Sort the resulting list (or lists) alphabeticlly.
|
sort: Sort the resulting list (or lists) alphabeticlly.
|
||||||
result: The return type. One of set, tuple, list.
|
result: The return type. One of set, tuple, list.
|
||||||
|
|
||||||
If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...]
|
If groups are defined, and group is not, the result will be a list/tuple/set of tuples, e.g. [('+', "foo"), ...]
|
||||||
"""
|
"""
|
||||||
|
separators = list(separators)
|
||||||
separators = list(separators)
|
self.pattern = ''.join((
|
||||||
|
('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators.
|
||||||
self.pattern = ''.join((
|
'(',
|
||||||
('[\s%s]*' % (''.join(separators), )), # Trap possible leading space or separators.
|
('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -.
|
||||||
'(',
|
''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes.
|
||||||
('[%s]%s' % (''.join([i for i in list(groups) if i is not None]), '?' if None in groups else '')) if groups else '', # Pass groups=('+','-') to handle optional leading + or -.
|
('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace.
|
||||||
''.join([(r'%s[^%s]+%s|' % (i, i, i)) for i in quotes]) if quotes else '', # Match any amount of text (that isn't a quote) inside quotes.
|
')',
|
||||||
('[^%s]+' % (''.join(separators), )), # Match any amount of text that isn't whitespace.
|
('[%s]*' % (''.join(separators), )), # Match possible separator character.
|
||||||
')',
|
))
|
||||||
('[%s]*' % (''.join(separators), )), # Match possible separator character.
|
self.regex = re.compile(self.pattern)
|
||||||
))
|
self.groups = list(groups)
|
||||||
self.regex = re.compile(self.pattern)
|
self.group = dict if group is True else group
|
||||||
|
self.normalize = normalize
|
||||||
self.groups = list(groups)
|
self.sort = sort
|
||||||
self.group = dict if group is True else group
|
self.result = result
|
||||||
self.normalize = normalize
|
|
||||||
self.sort = sort
|
|
||||||
self.result = result
|
|
||||||
|
|
||||||
def split(self, value):
|
def split(self, value):
|
||||||
if not isinstance(value, basestring): raise TypeError("Invalid type for argument 'value'.")
|
if not isinstance(value, basestring):
|
||||||
|
raise TypeError("Invalid type for argument 'value'.")
|
||||||
matches = self.regex.findall(value)
|
matches = self.regex.findall(value)
|
||||||
|
if callable(self.normalize):
|
||||||
if callable(self.normalize): matches = [self.normalize(i) for i in matches]
|
matches = [self.normalize(i) for i in matches]
|
||||||
if self.sort: matches.sort()
|
if self.sort:
|
||||||
if not self.groups: return self.result(matches)
|
matches.sort()
|
||||||
|
if not self.groups:
|
||||||
|
return self.result(matches)
|
||||||
groups = dict([(i, list()) for i in self.groups])
|
groups = dict([(i, list()) for i in self.groups])
|
||||||
if None not in groups.iterkeys(): groups[None] = list() # To prevent errors.
|
if None not in groups.iterkeys():
|
||||||
|
groups[None] = list() # To prevent errors.
|
||||||
for i in matches:
|
for i in matches:
|
||||||
if i[0] in self.groups:
|
if i[0] in self.groups:
|
||||||
groups[i[0]].append(i[1:])
|
groups[i[0]].append(i[1:])
|
||||||
else:
|
else:
|
||||||
groups[None].append(i)
|
groups[None].append(i)
|
||||||
|
if self.group is dict:
|
||||||
if self.group is dict: return groups
|
return groups
|
||||||
|
|
||||||
if self.group is False or self.group is None:
|
if self.group is False or self.group is None:
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
for group in self.groups:
|
for group in self.groups:
|
||||||
results.extend([(group, match) for match in groups[group]])
|
results.extend([(group, match) for match in groups[group]])
|
||||||
|
|
||||||
return self.result(results)
|
return self.result(results)
|
||||||
|
|
||||||
return self.group([[match for match in groups[group]] for group in self.groups])
|
return self.group([[match for match in groups[group]] for group in self.groups])
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,4 +23,11 @@
|
|||||||
% endfor
|
% endfor
|
||||||
% else:
|
% else:
|
||||||
<div class="post"><p>Sorry, there seem to be no results for your search query.</p></div>
|
<div class="post"><p>Sorry, there seem to be no results for your search query.</p></div>
|
||||||
% endif
|
<h3>Search tips:</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Words should be space-separated.</li>
|
||||||
|
<li>Use quotes (") to group phrases.</li>
|
||||||
|
<li>Use plus (+) to make words/phrases mandatory.</li>
|
||||||
|
<li>Use minus (-) to exclude posts with those words/phrases.</li>
|
||||||
|
</ul>
|
||||||
|
% endif
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
<li id="search">
|
<li id="search">
|
||||||
<form id="searchform" method="get" action="/search">
|
<form id="searchform" method="get" action="/search">
|
||||||
<div>
|
<div>
|
||||||
<input type="text" name="q" id="s" size="15" />
|
<input type="text" name="q" id="s" size="15" value="${c.querystring}" />
|
||||||
<br />
|
<br />
|
||||||
<input type="submit" value="Search" />
|
<input type="submit" value="Search" />
|
||||||
</div>
|
</div>
|
||||||
|
Reference in New Issue
Block a user