Biblegateway.com has changed it's Bible-Book-List Layout. Addapt the regex for importing booklist of a bible from biblegateway

changed log usage
This commit is contained in:
Armin Köhler 2011-06-04 21:34:36 +02:00
parent 937cfd2260
commit 522e68c38c
1 changed files with 15 additions and 12 deletions

View File

@ -109,7 +109,7 @@ class BGExtract(object):
try:
clean_verse_num = int(str(raw_verse_num))
except ValueError:
log.exception(u'Illegal verse number in %s %s %s:%s',
log.warn(u'Illegal verse number in %s %s %s:%s',
version, bookname, chapter, unicode(raw_verse_num))
if clean_verse_num:
verse_text = raw_verse_num.next
@ -139,16 +139,17 @@ class BGExtract(object):
"""
log.debug(u'BGExtract.get_books_from_http("%s")', version)
url_params = urllib.urlencode(
{u'search': 'Bible-List', u'version': u'%s' % version})
reference_url = u'http://www.biblegateway.com/passage/?%s' % url_params
{u'action': 'getVersionInfo', u'vid': u'%s' % version})
reference_url = u'http://www.biblegateway.com/versions/?%s#books' % \
url_params
page = get_web_page(reference_url)
if not page:
send_error_message(u'download')
return None
page_source = page.read()
page_source = unicode(page_source, 'utf8')
page_source_temp = re.search(u'<table id="booklist".*?>.*?</table>', \
page_source, re.DOTALL)
page_source_temp = re.search(u'<table .*?class="infotable".*?>.*?'\
u'</table>', page_source, re.DOTALL)
if page_source_temp:
soup = page_source_temp.group(0)
else:
@ -156,15 +157,17 @@ class BGExtract(object):
try:
soup = BeautifulSoup(soup)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the Bible page.')
log.error(u'BeautifulSoup could not parse the Bible page.')
send_error_message(u'parse')
return None
if not soup:
send_error_message(u'parse')
return None
Receiver.send_message(u'openlp_process_events')
content = soup.find(u'table', {u'id': u'booklist'})
content = soup.find(u'table', {u'class': u'infotable'})
content = content.findAll(u'tr')
if not content:
log.exception(u'No books found in the Biblegateway response.')
log.error(u'No books found in the Biblegateway response.')
send_error_message(u'parse')
return None
books = []
@ -210,7 +213,7 @@ class BSExtract(object):
Receiver.send_message(u'openlp_process_events')
content = soup.find(u'div', u'content')
if not content:
log.exception(u'No verses found in the Bibleserver response.')
log.error(u'No verses found in the Bibleserver response.')
send_error_message(u'parse')
return None
content = content.find(u'div').findAll(u'div')
@ -239,7 +242,7 @@ class BSExtract(object):
return None
content = soup.find(u'ul')
if not content:
log.exception(u'No books found in the Bibleserver response.')
log.error(u'No books found in the Bibleserver response.')
send_error_message(u'parse')
return None
content = content.findAll(u'li')
@ -283,7 +286,7 @@ class CWExtract(object):
Receiver.send_message(u'openlp_process_events')
htmlverses = soup.findAll(u'span', u'versetext')
if not htmlverses:
log.debug(u'No verses found in the CrossWalk response.')
log.error(u'No verses found in the CrossWalk response.')
send_error_message(u'parse')
return None
verses = {}
@ -335,7 +338,7 @@ class CWExtract(object):
content = soup.find(u'div', {u'class': u'Body'})
content = content.find(u'ul', {u'class': u'parent'})
if not content:
log.exception(u'No books found in the Crosswalk response.')
log.error(u'No books found in the Crosswalk response.')
send_error_message(u'parse')
return None
content = content.findAll(u'li')