Refactor web bibles

This commit is contained in:
Jon Tibble 2011-01-10 01:46:47 +00:00
parent 94b033d33b
commit 5edde179a4
3 changed files with 88 additions and 81 deletions

View File

@ -230,7 +230,7 @@ def open(filename):
return _open(filename)
def _fix_addersses(**kwargs):
def _fix_addresses(**kwargs):
for headername in (u'address', u'to', u'cc', u'bcc'):
try:
headervalue = kwargs[headername]
@ -260,7 +260,7 @@ def mailto_format(**kwargs):
"""
# @TODO: implement utf8 option
kwargs = _fix_addersses(**kwargs)
kwargs = _fix_addresses(**kwargs)
parts = []
for headername in (u'to', u'cc', u'bcc', u'subject', u'body', u'attach'):
if kwargs.has_key(headername):

View File

@ -282,8 +282,38 @@ def split_filename(path):
else:
return os.path.split(path)
def get_web_page(url, update_openlp=False):
"""
Attempts to download the webpage at url and returns that page or None.
``url``
The URL to be downloaded.
``update_openlp``
Tells OpenLP to update itself if the page is successfully downloaded.
Defaults to False.
"""
# TODO: Add proxy usage. Get proxy info from OpenLP settings, add to a
# proxy_handler, build into an opener and install the opener into urllib2.
# http://docs.python.org/library/urllib2.html
if not url:
return None
page = None
log.debug(u'Downloading URL = %s' % url)
try:
page = urllib2.urlopen(url)
log.debug(u'Downloaded URL = %s' % page.geturl())
except urllib2.URLError:
log.exception(u'The web page could not be downloaded')
if not page:
return None
if update_openlp:
Receiver.send_message(u'openlp_process_events')
return page
from languagemanager import LanguageManager
from actions import ActionList
__all__ = [u'AppLocation', u'check_latest_version', u'add_actions',
u'get_filesystem_encoding', u'LanguageManager', u'ActionList']
u'get_filesystem_encoding', u'LanguageManager', u'ActionList',
u'get_web_page']

View File

@ -23,20 +23,22 @@
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
"""
The :mod:`http` module enables OpenLP to retrieve scripture from bible
websites.
"""
import logging
import os
import re
import sqlite3
import socket
import urllib
import urllib2
from HTMLParser import HTMLParseError
from BeautifulSoup import BeautifulSoup, NavigableString
from openlp.core.lib import Receiver, translate
from openlp.core.utils import AppLocation
from openlp.core.utils import AppLocation, get_web_page
from openlp.plugins.bibles.lib import SearchResults
from openlp.plugins.bibles.lib.db import BibleDB, Book
@ -204,29 +206,11 @@ class BGExtract(object):
url_params = urllib.urlencode(
{u'search': u'%s %s' % (bookname, chapter),
u'version': u'%s' % version})
page = None
try:
page = urllib2.urlopen(
u'http://www.biblegateway.com/passage/?%s' % url_params)
log.debug(u'BibleGateway url = %s' % page.geturl())
Receiver.send_message(u'openlp_process_events')
except urllib2.URLError:
log.exception(u'The web bible page could not be downloaded.')
send_error_message(u'download')
finally:
if not page:
return None
cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')]
soup = None
try:
soup = BeautifulSoup(page, markupMassage=cleaner)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the bible page.')
Receiver.send_message(u'bibles_download_error')
send_error_message(u'parse')
finally:
if not soup:
return None
soup = get_soup_for_bible_ref(
u'http://www.biblegateway.com/passage/?%s' % url_params, cleaner)
if not soup:
return None
Receiver.send_message(u'openlp_process_events')
footnotes = soup.findAll(u'sup', u'footnote')
if footnotes:
@ -280,35 +264,15 @@ class BSExtract(object):
log.debug(u'get_bible_chapter %s,%s,%s', version, bookname, chapter)
chapter_url = u'http://m.bibleserver.com/text/%s/%s%s' % \
(version, bookname, chapter)
log.debug(u'URL: %s', chapter_url)
page = None
try:
page = urllib2.urlopen(chapter_url)
Receiver.send_message(u'openlp_process_events')
except urllib2.URLError:
log.exception(u'The web bible page could not be downloaded.')
send_error_message(u'download')
finally:
if not page:
return None
soup = None
try:
soup = BeautifulSoup(page)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the bible page.')
send_error_message(u'parse')
soup = get_soup_for_bible_ref(chapter_url)
if not soup:
return None
Receiver.send_message(u'openlp_process_events')
content = None
try:
content = soup.find(u'div', u'content').find(u'div').findAll(u'div')
except:
content = soup.find(u'div', u'content').find(u'div').findAll(u'div')
if not content:
log.exception(u'No verses found in the Bibleserver response.')
send_error_message(u'parse')
finally:
if not content:
return None
return None
verse_number = re.compile(r'v(\d{2})(\d{3})(\d{3}) verse')
verses = {}
for verse in content:
@ -344,21 +308,8 @@ class CWExtract(object):
urlbookname = bookname.replace(u' ', u'-')
chapter_url = u'http://www.biblestudytools.com/%s/%s/%s.html' % \
(version, urlbookname.lower(), chapter)
log.debug(u'URL: %s', chapter_url)
page = None
try:
page = urllib2.urlopen(chapter_url)
Receiver.send_message(u'openlp_process_events')
except urllib2.URLError:
log.exception(u'The web bible page could not be downloaded.')
send_error_message(u'download')
return None
soup = None
try:
soup = BeautifulSoup(page)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the bible page.')
send_error_message(u'parse')
soup = get_soup_for_bible_ref(chapter_url)
if not soup:
return None
Receiver.send_message(u'openlp_process_events')
htmlverses = soup.findAll(u'span', u'versetext')
@ -416,6 +367,8 @@ class HTTPBible(BibleDB):
BibleDB.__init__(self, parent, **kwargs)
self.download_source = kwargs[u'download_source']
self.download_name = kwargs[u'download_name']
# TODO: Clean up proxy stuff. We probably want one global proxy per
# connection type (HTTP and HTTPS) at most.
self.proxy_server = None
self.proxy_username = None
self.proxy_password = None
@ -471,7 +424,7 @@ class HTTPBible(BibleDB):
book = reference[0]
db_book = self.get_book(book)
if not db_book:
book_details = self.lookup_book(book)
book_details = HTTPBooks.get_book(book)
if not book_details:
Receiver.send_message(u'openlp_error_message', {
u'title': translate('BiblesPlugin', 'No Book Found'),
@ -511,12 +464,12 @@ class HTTPBible(BibleDB):
log.debug(u'get_chapter %s, %s', book, chapter)
log.debug(u'source = %s', self.download_source)
if self.download_source.lower() == u'crosswalk':
ev = CWExtract(self.proxy_server)
handler = CWExtract(self.proxy_server)
elif self.download_source.lower() == u'biblegateway':
ev = BGExtract(self.proxy_server)
handler = BGExtract(self.proxy_server)
elif self.download_source.lower() == u'bibleserver':
ev = BSExtract(self.proxy_server)
return ev.get_bible_chapter(self.download_name, book, chapter)
handler = BSExtract(self.proxy_server)
return handler.get_bible_chapter(self.download_name, book, chapter)
def get_books(self):
"""
@ -525,12 +478,6 @@ class HTTPBible(BibleDB):
return [Book.populate(name=book['name'])
for book in HTTPBooks.get_books()]
def lookup_book(self, book):
"""
Look up the name of a book.
"""
return HTTPBooks.get_book(book)
def get_chapter_count(self, book):
"""
Return the number of chapters in a particular book.
@ -549,8 +496,38 @@ class HTTPBible(BibleDB):
"""
return HTTPBooks.get_verse_count(book, chapter)
def get_soup_for_bible_ref(reference_url, cleaner=None):
"""
Gets a webpage and returns a parsed and optionally cleaned soup or None.
``reference_url``
The URL to obtain the soup from.
``cleaner``
An optional regex to use during webpage parsing.
"""
if not reference_url:
return None
page = get_web_page(reference_url, True)
if not page:
send_error_message(u'download')
return None
soup = None
try:
if cleaner:
soup = BeautifulSoup(page, markupMassage=cleaner)
else:
soup = BeautifulSoup(page)
except HTMLParseError:
log.exception(u'BeautifulSoup could not parse the bible page.')
if not soup:
send_error_message(u'parse')
return None
Receiver.send_message(u'openlp_process_events')
return soup
def send_error_message(reason):
if reason == u'downoad':
if reason == u'download':
Receiver.send_message(u'openlp_error_message', {
u'title': translate('BiblePlugin.HTTPBible', 'Download Error'),
u'message': translate('BiblePlugin.HTTPBible', 'There was a '
@ -563,5 +540,5 @@ def send_error_message(reason):
u'title': translate('BiblePlugin.HTTPBible', 'Parse Error'),
u'message': translate('BiblePlugin.HTTPBible', 'There was a '
'problem extracting your verse selection. If this error continues '
'continues to occur consider reporting a bug.')
'to occur consider reporting a bug.')
})