Workaround for new biblegateway layout

This commit is contained in:
Tomas Groth 2014-06-26 10:23:16 +02:00
parent 882ad4b23a
commit a37231dfd9

View File

@ -238,7 +238,7 @@ class BGExtract(object):
version) version)
cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')] cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')]
soup = get_soup_for_bible_ref( soup = get_soup_for_bible_ref(
u'http://www.biblegateway.com/passage/?%s' % url_params, u'http://legacy.biblegateway.com/passage/?%s' % url_params,
pre_parse_regex=r'<meta name.*?/>', pre_parse_substitute='', pre_parse_regex=r'<meta name.*?/>', pre_parse_substitute='',
cleaner=cleaner) cleaner=cleaner)
if not soup: if not soup:
@ -271,7 +271,7 @@ class BGExtract(object):
log.debug(u'BGExtract.get_books_from_http("%s")', version) log.debug(u'BGExtract.get_books_from_http("%s")', version)
url_params = urllib.urlencode( url_params = urllib.urlencode(
{u'action': 'getVersionInfo', u'vid': u'%s' % version}) {u'action': 'getVersionInfo', u'vid': u'%s' % version})
reference_url = u'http://www.biblegateway.com/versions/?%s#books' % \ reference_url = u'http://legacy.biblegateway.com/versions/?%s#books' % \
url_params url_params
page = get_web_page(reference_url) page = get_web_page(reference_url)
if not page: if not page: