Biblegateway.com has changed it's Bible-Book-List Layout. Addapt the regex for importing booklist of a bible from biblegateway

changed log usage
This commit is contained in:
Armin Köhler 2011-06-04 21:34:36 +02:00
parent 937cfd2260
commit 522e68c38c
1 changed files with 15 additions and 12 deletions

View File

@ -109,7 +109,7 @@ class BGExtract(object):
try: try:
clean_verse_num = int(str(raw_verse_num)) clean_verse_num = int(str(raw_verse_num))
except ValueError: except ValueError:
log.exception(u'Illegal verse number in %s %s %s:%s', log.warn(u'Illegal verse number in %s %s %s:%s',
version, bookname, chapter, unicode(raw_verse_num)) version, bookname, chapter, unicode(raw_verse_num))
if clean_verse_num: if clean_verse_num:
verse_text = raw_verse_num.next verse_text = raw_verse_num.next
@ -139,16 +139,17 @@ class BGExtract(object):
""" """
log.debug(u'BGExtract.get_books_from_http("%s")', version) log.debug(u'BGExtract.get_books_from_http("%s")', version)
url_params = urllib.urlencode( url_params = urllib.urlencode(
{u'search': 'Bible-List', u'version': u'%s' % version}) {u'action': 'getVersionInfo', u'vid': u'%s' % version})
reference_url = u'http://www.biblegateway.com/passage/?%s' % url_params reference_url = u'http://www.biblegateway.com/versions/?%s#books' % \
url_params
page = get_web_page(reference_url) page = get_web_page(reference_url)
if not page: if not page:
send_error_message(u'download') send_error_message(u'download')
return None return None
page_source = page.read() page_source = page.read()
page_source = unicode(page_source, 'utf8') page_source = unicode(page_source, 'utf8')
page_source_temp = re.search(u'<table id="booklist".*?>.*?</table>', \ page_source_temp = re.search(u'<table .*?class="infotable".*?>.*?'\
page_source, re.DOTALL) u'</table>', page_source, re.DOTALL)
if page_source_temp: if page_source_temp:
soup = page_source_temp.group(0) soup = page_source_temp.group(0)
else: else:
@ -156,15 +157,17 @@ class BGExtract(object):
try: try:
soup = BeautifulSoup(soup) soup = BeautifulSoup(soup)
except HTMLParseError: except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the Bible page.') log.error(u'BeautifulSoup could not parse the Bible page.')
send_error_message(u'parse')
return None
if not soup: if not soup:
send_error_message(u'parse') send_error_message(u'parse')
return None return None
Receiver.send_message(u'openlp_process_events') Receiver.send_message(u'openlp_process_events')
content = soup.find(u'table', {u'id': u'booklist'}) content = soup.find(u'table', {u'class': u'infotable'})
content = content.findAll(u'tr') content = content.findAll(u'tr')
if not content: if not content:
log.exception(u'No books found in the Biblegateway response.') log.error(u'No books found in the Biblegateway response.')
send_error_message(u'parse') send_error_message(u'parse')
return None return None
books = [] books = []
@ -210,7 +213,7 @@ class BSExtract(object):
Receiver.send_message(u'openlp_process_events') Receiver.send_message(u'openlp_process_events')
content = soup.find(u'div', u'content') content = soup.find(u'div', u'content')
if not content: if not content:
log.exception(u'No verses found in the Bibleserver response.') log.error(u'No verses found in the Bibleserver response.')
send_error_message(u'parse') send_error_message(u'parse')
return None return None
content = content.find(u'div').findAll(u'div') content = content.find(u'div').findAll(u'div')
@ -239,7 +242,7 @@ class BSExtract(object):
return None return None
content = soup.find(u'ul') content = soup.find(u'ul')
if not content: if not content:
log.exception(u'No books found in the Bibleserver response.') log.error(u'No books found in the Bibleserver response.')
send_error_message(u'parse') send_error_message(u'parse')
return None return None
content = content.findAll(u'li') content = content.findAll(u'li')
@ -283,7 +286,7 @@ class CWExtract(object):
Receiver.send_message(u'openlp_process_events') Receiver.send_message(u'openlp_process_events')
htmlverses = soup.findAll(u'span', u'versetext') htmlverses = soup.findAll(u'span', u'versetext')
if not htmlverses: if not htmlverses:
log.debug(u'No verses found in the CrossWalk response.') log.error(u'No verses found in the CrossWalk response.')
send_error_message(u'parse') send_error_message(u'parse')
return None return None
verses = {} verses = {}
@ -335,7 +338,7 @@ class CWExtract(object):
content = soup.find(u'div', {u'class': u'Body'}) content = soup.find(u'div', {u'class': u'Body'})
content = content.find(u'ul', {u'class': u'parent'}) content = content.find(u'ul', {u'class': u'parent'})
if not content: if not content:
log.exception(u'No books found in the Crosswalk response.') log.error(u'No books found in the Crosswalk response.')
send_error_message(u'parse') send_error_message(u'parse')
return None return None
content = content.findAll(u'li') content = content.findAll(u'li')