Refactor web bibles

This commit is contained in:
Jon Tibble 2011-01-10 01:46:47 +00:00
parent 94b033d33b
commit 5edde179a4
3 changed files with 88 additions and 81 deletions

View File

@ -230,7 +230,7 @@ def open(filename):
return _open(filename) return _open(filename)
def _fix_addersses(**kwargs): def _fix_addresses(**kwargs):
for headername in (u'address', u'to', u'cc', u'bcc'): for headername in (u'address', u'to', u'cc', u'bcc'):
try: try:
headervalue = kwargs[headername] headervalue = kwargs[headername]
@ -260,7 +260,7 @@ def mailto_format(**kwargs):
""" """
# @TODO: implement utf8 option # @TODO: implement utf8 option
kwargs = _fix_addersses(**kwargs) kwargs = _fix_addresses(**kwargs)
parts = [] parts = []
for headername in (u'to', u'cc', u'bcc', u'subject', u'body', u'attach'): for headername in (u'to', u'cc', u'bcc', u'subject', u'body', u'attach'):
if kwargs.has_key(headername): if kwargs.has_key(headername):

View File

@ -282,8 +282,38 @@ def split_filename(path):
else: else:
return os.path.split(path) return os.path.split(path)
def get_web_page(url, update_openlp=False):
"""
Attempts to download the webpage at url and returns that page or None.
``url``
The URL to be downloaded.
``update_openlp``
Tells OpenLP to update itself if the page is successfully downloaded.
Defaults to False.
"""
# TODO: Add proxy usage. Get proxy info from OpenLP settings, add to a
# proxy_handler, build into an opener and install the opener into urllib2.
# http://docs.python.org/library/urllib2.html
if not url:
return None
page = None
log.debug(u'Downloading URL = %s' % url)
try:
page = urllib2.urlopen(url)
log.debug(u'Downloaded URL = %s' % page.geturl())
except urllib2.URLError:
log.exception(u'The web page could not be downloaded')
if not page:
return None
if update_openlp:
Receiver.send_message(u'openlp_process_events')
return page
from languagemanager import LanguageManager from languagemanager import LanguageManager
from actions import ActionList from actions import ActionList
__all__ = [u'AppLocation', u'check_latest_version', u'add_actions', __all__ = [u'AppLocation', u'check_latest_version', u'add_actions',
u'get_filesystem_encoding', u'LanguageManager', u'ActionList'] u'get_filesystem_encoding', u'LanguageManager', u'ActionList',
u'get_web_page']

View File

@ -23,20 +23,22 @@
# with this program; if not, write to the Free Software Foundation, Inc., 59 # # with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Temple Place, Suite 330, Boston, MA 02111-1307 USA #
############################################################################### ###############################################################################
"""
The :mod:`http` module enables OpenLP to retrieve scripture from bible
websites.
"""
import logging import logging
import os import os
import re import re
import sqlite3 import sqlite3
import socket import socket
import urllib import urllib
import urllib2
from HTMLParser import HTMLParseError from HTMLParser import HTMLParseError
from BeautifulSoup import BeautifulSoup, NavigableString from BeautifulSoup import BeautifulSoup, NavigableString
from openlp.core.lib import Receiver, translate from openlp.core.lib import Receiver, translate
from openlp.core.utils import AppLocation from openlp.core.utils import AppLocation, get_web_page
from openlp.plugins.bibles.lib import SearchResults from openlp.plugins.bibles.lib import SearchResults
from openlp.plugins.bibles.lib.db import BibleDB, Book from openlp.plugins.bibles.lib.db import BibleDB, Book
@ -204,29 +206,11 @@ class BGExtract(object):
url_params = urllib.urlencode( url_params = urllib.urlencode(
{u'search': u'%s %s' % (bookname, chapter), {u'search': u'%s %s' % (bookname, chapter),
u'version': u'%s' % version}) u'version': u'%s' % version})
page = None
try:
page = urllib2.urlopen(
u'http://www.biblegateway.com/passage/?%s' % url_params)
log.debug(u'BibleGateway url = %s' % page.geturl())
Receiver.send_message(u'openlp_process_events')
except urllib2.URLError:
log.exception(u'The web bible page could not be downloaded.')
send_error_message(u'download')
finally:
if not page:
return None
cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')] cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')]
soup = None soup = get_soup_for_bible_ref(
try: u'http://www.biblegateway.com/passage/?%s' % url_params, cleaner)
soup = BeautifulSoup(page, markupMassage=cleaner) if not soup:
except HTMLParseError: return None
log.exception(u'BeautifulSoup could not parse the bible page.')
Receiver.send_message(u'bibles_download_error')
send_error_message(u'parse')
finally:
if not soup:
return None
Receiver.send_message(u'openlp_process_events') Receiver.send_message(u'openlp_process_events')
footnotes = soup.findAll(u'sup', u'footnote') footnotes = soup.findAll(u'sup', u'footnote')
if footnotes: if footnotes:
@ -280,35 +264,15 @@ class BSExtract(object):
log.debug(u'get_bible_chapter %s,%s,%s', version, bookname, chapter) log.debug(u'get_bible_chapter %s,%s,%s', version, bookname, chapter)
chapter_url = u'http://m.bibleserver.com/text/%s/%s%s' % \ chapter_url = u'http://m.bibleserver.com/text/%s/%s%s' % \
(version, bookname, chapter) (version, bookname, chapter)
soup = get_soup_for_bible_ref(chapter_url)
log.debug(u'URL: %s', chapter_url) if not soup:
page = None
try:
page = urllib2.urlopen(chapter_url)
Receiver.send_message(u'openlp_process_events')
except urllib2.URLError:
log.exception(u'The web bible page could not be downloaded.')
send_error_message(u'download')
finally:
if not page:
return None
soup = None
try:
soup = BeautifulSoup(page)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the bible page.')
send_error_message(u'parse')
return None return None
Receiver.send_message(u'openlp_process_events') Receiver.send_message(u'openlp_process_events')
content = None content = soup.find(u'div', u'content').find(u'div').findAll(u'div')
try: if not content:
content = soup.find(u'div', u'content').find(u'div').findAll(u'div')
except:
log.exception(u'No verses found in the Bibleserver response.') log.exception(u'No verses found in the Bibleserver response.')
send_error_message(u'parse') send_error_message(u'parse')
finally: return None
if not content:
return None
verse_number = re.compile(r'v(\d{2})(\d{3})(\d{3}) verse') verse_number = re.compile(r'v(\d{2})(\d{3})(\d{3}) verse')
verses = {} verses = {}
for verse in content: for verse in content:
@ -344,21 +308,8 @@ class CWExtract(object):
urlbookname = bookname.replace(u' ', u'-') urlbookname = bookname.replace(u' ', u'-')
chapter_url = u'http://www.biblestudytools.com/%s/%s/%s.html' % \ chapter_url = u'http://www.biblestudytools.com/%s/%s/%s.html' % \
(version, urlbookname.lower(), chapter) (version, urlbookname.lower(), chapter)
log.debug(u'URL: %s', chapter_url) soup = get_soup_for_bible_ref(chapter_url)
page = None if not soup:
try:
page = urllib2.urlopen(chapter_url)
Receiver.send_message(u'openlp_process_events')
except urllib2.URLError:
log.exception(u'The web bible page could not be downloaded.')
send_error_message(u'download')
return None
soup = None
try:
soup = BeautifulSoup(page)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the bible page.')
send_error_message(u'parse')
return None return None
Receiver.send_message(u'openlp_process_events') Receiver.send_message(u'openlp_process_events')
htmlverses = soup.findAll(u'span', u'versetext') htmlverses = soup.findAll(u'span', u'versetext')
@ -416,6 +367,8 @@ class HTTPBible(BibleDB):
BibleDB.__init__(self, parent, **kwargs) BibleDB.__init__(self, parent, **kwargs)
self.download_source = kwargs[u'download_source'] self.download_source = kwargs[u'download_source']
self.download_name = kwargs[u'download_name'] self.download_name = kwargs[u'download_name']
# TODO: Clean up proxy stuff. We probably want one global proxy per
# connection type (HTTP and HTTPS) at most.
self.proxy_server = None self.proxy_server = None
self.proxy_username = None self.proxy_username = None
self.proxy_password = None self.proxy_password = None
@ -471,7 +424,7 @@ class HTTPBible(BibleDB):
book = reference[0] book = reference[0]
db_book = self.get_book(book) db_book = self.get_book(book)
if not db_book: if not db_book:
book_details = self.lookup_book(book) book_details = HTTPBooks.get_book(book)
if not book_details: if not book_details:
Receiver.send_message(u'openlp_error_message', { Receiver.send_message(u'openlp_error_message', {
u'title': translate('BiblesPlugin', 'No Book Found'), u'title': translate('BiblesPlugin', 'No Book Found'),
@ -511,12 +464,12 @@ class HTTPBible(BibleDB):
log.debug(u'get_chapter %s, %s', book, chapter) log.debug(u'get_chapter %s, %s', book, chapter)
log.debug(u'source = %s', self.download_source) log.debug(u'source = %s', self.download_source)
if self.download_source.lower() == u'crosswalk': if self.download_source.lower() == u'crosswalk':
ev = CWExtract(self.proxy_server) handler = CWExtract(self.proxy_server)
elif self.download_source.lower() == u'biblegateway': elif self.download_source.lower() == u'biblegateway':
ev = BGExtract(self.proxy_server) handler = BGExtract(self.proxy_server)
elif self.download_source.lower() == u'bibleserver': elif self.download_source.lower() == u'bibleserver':
ev = BSExtract(self.proxy_server) handler = BSExtract(self.proxy_server)
return ev.get_bible_chapter(self.download_name, book, chapter) return handler.get_bible_chapter(self.download_name, book, chapter)
def get_books(self): def get_books(self):
""" """
@ -525,12 +478,6 @@ class HTTPBible(BibleDB):
return [Book.populate(name=book['name']) return [Book.populate(name=book['name'])
for book in HTTPBooks.get_books()] for book in HTTPBooks.get_books()]
def lookup_book(self, book):
"""
Look up the name of a book.
"""
return HTTPBooks.get_book(book)
def get_chapter_count(self, book): def get_chapter_count(self, book):
""" """
Return the number of chapters in a particular book. Return the number of chapters in a particular book.
@ -549,8 +496,38 @@ class HTTPBible(BibleDB):
""" """
return HTTPBooks.get_verse_count(book, chapter) return HTTPBooks.get_verse_count(book, chapter)
def get_soup_for_bible_ref(reference_url, cleaner=None):
"""
Gets a webpage and returns a parsed and optionally cleaned soup or None.
``reference_url``
The URL to obtain the soup from.
``cleaner``
An optional regex to use during webpage parsing.
"""
if not reference_url:
return None
page = get_web_page(reference_url, True)
if not page:
send_error_message(u'download')
return None
soup = None
try:
if cleaner:
soup = BeautifulSoup(page, markupMassage=cleaner)
else:
soup = BeautifulSoup(page)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the bible page.')
if not soup:
send_error_message(u'parse')
return None
Receiver.send_message(u'openlp_process_events')
return soup
def send_error_message(reason): def send_error_message(reason):
if reason == u'downoad': if reason == u'download':
Receiver.send_message(u'openlp_error_message', { Receiver.send_message(u'openlp_error_message', {
u'title': translate('BiblePlugin.HTTPBible', 'Download Error'), u'title': translate('BiblePlugin.HTTPBible', 'Download Error'),
u'message': translate('BiblePlugin.HTTPBible', 'There was a ' u'message': translate('BiblePlugin.HTTPBible', 'There was a '
@ -563,5 +540,5 @@ def send_error_message(reason):
u'title': translate('BiblePlugin.HTTPBible', 'Parse Error'), u'title': translate('BiblePlugin.HTTPBible', 'Parse Error'),
u'message': translate('BiblePlugin.HTTPBible', 'There was a ' u'message': translate('BiblePlugin.HTTPBible', 'There was a '
'problem extracting your verse selection. If this error continues ' 'problem extracting your verse selection. If this error continues '
'continues to occur consider reporting a bug.') 'to occur consider reporting a bug.')
}) })