Fix unicode bookname handling on Biblegateway

This commit is contained in:
Armin Köhler 2011-05-30 22:58:30 +02:00
parent 6c870f7f7f
commit 8a8580f359
1 changed files with 3 additions and 4 deletions

View File

@ -72,9 +72,8 @@ class BGExtract(object):
log.debug(u'BGExtract.get_bible_chapter("%s", "%s", "%s")', version, log.debug(u'BGExtract.get_bible_chapter("%s", "%s", "%s")', version,
bookname, chapter) bookname, chapter)
urlbookname = urllib.quote(bookname.encode("utf-8")) urlbookname = urllib.quote(bookname.encode("utf-8"))
url_params = urllib.urlencode( url_params = u'search=%s+%s&version=%s' % (urlbookname, chapter,
{u'search': u'%s %s' % (urlbookname, chapter), version)
u'version': u'%s' % version})
cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')] cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')]
soup = get_soup_for_bible_ref( soup = get_soup_for_bible_ref(
u'http://www.biblegateway.com/passage/?%s' % url_params, u'http://www.biblegateway.com/passage/?%s' % url_params,
@ -97,7 +96,7 @@ class BGExtract(object):
verse_list = {} verse_list = {}
# Cater for inconsistent mark up in the first verse of a chapter. # Cater for inconsistent mark up in the first verse of a chapter.
first_verse = verses.find(u'versenum') first_verse = verses.find(u'versenum')
if first_verse: if first_verse and 0 in first_verse.contents:
verse_list[1] = unicode(first_verse.contents[0]) verse_list[1] = unicode(first_verse.contents[0])
for verse in verses(u'sup', u'versenum'): for verse in verses(u'sup', u'versenum'):
raw_verse_num = verse.next raw_verse_num = verse.next