Fix BibleServer downloads (Bug #701651)

This commit is contained in:
Jon Tibble 2011-01-11 23:43:27 +00:00
parent cf0608cc45
commit e0a2976fa9
2 changed files with 15 additions and 5 deletions

View File

@ -282,13 +282,16 @@ def split_filename(path):
else: else:
return os.path.split(path) return os.path.split(path)
def get_web_page(url, update_openlp=False): def get_web_page(url, header=None, update_openlp=False):
""" """
Attempts to download the webpage at url and returns that page or None. Attempts to download the webpage at url and returns that page or None.
``url`` ``url``
The URL to be downloaded. The URL to be downloaded.
``header``
An optional HTTP header to pass in the request to the web server.
``update_openlp`` ``update_openlp``
Tells OpenLP to update itself if the page is successfully downloaded. Tells OpenLP to update itself if the page is successfully downloaded.
Defaults to False. Defaults to False.
@ -298,10 +301,13 @@ def get_web_page(url, update_openlp=False):
# http://docs.python.org/library/urllib2.html # http://docs.python.org/library/urllib2.html
if not url: if not url:
return None return None
req = urllib2.Request(url)
if header:
req.add_header(header[0], header[1])
page = None page = None
log.debug(u'Downloading URL = %s' % url) log.debug(u'Downloading URL = %s' % url)
try: try:
page = urllib2.urlopen(url) page = urllib2.urlopen(req)
log.debug(u'Downloaded URL = %s' % page.geturl()) log.debug(u'Downloaded URL = %s' % page.geturl())
except urllib2.URLError: except urllib2.URLError:
log.exception(u'The web page could not be downloaded') log.exception(u'The web page could not be downloaded')

View File

@ -264,7 +264,8 @@ class BSExtract(object):
log.debug(u'get_bible_chapter %s,%s,%s', version, bookname, chapter) log.debug(u'get_bible_chapter %s,%s,%s', version, bookname, chapter)
chapter_url = u'http://m.bibleserver.com/text/%s/%s%s' % \ chapter_url = u'http://m.bibleserver.com/text/%s/%s%s' % \
(version, bookname, chapter) (version, bookname, chapter)
soup = get_soup_for_bible_ref(chapter_url) header = (u'Accept-Language', u'en')
soup = get_soup_for_bible_ref(chapter_url, header)
if not soup: if not soup:
return None return None
Receiver.send_message(u'openlp_process_events') Receiver.send_message(u'openlp_process_events')
@ -496,19 +497,22 @@ class HTTPBible(BibleDB):
""" """
return HTTPBooks.get_verse_count(book, chapter) return HTTPBooks.get_verse_count(book, chapter)
def get_soup_for_bible_ref(reference_url, cleaner=None): def get_soup_for_bible_ref(reference_url, header=None, cleaner=None):
""" """
Gets a webpage and returns a parsed and optionally cleaned soup or None. Gets a webpage and returns a parsed and optionally cleaned soup or None.
``reference_url`` ``reference_url``
The URL to obtain the soup from. The URL to obtain the soup from.
``header``
An optional HTTP header to pass to the bible web server.
``cleaner`` ``cleaner``
An optional regex to use during webpage parsing. An optional regex to use during webpage parsing.
""" """
if not reference_url: if not reference_url:
return None return None
page = get_web_page(reference_url, True) page = get_web_page(reference_url, header, True)
if not page: if not page:
send_error_message(u'download') send_error_message(u'download')
return None return None