forked from openlp/openlp
Refactor web bibles
This commit is contained in:
parent
94b033d33b
commit
5edde179a4
@ -230,7 +230,7 @@ def open(filename):
|
||||
return _open(filename)
|
||||
|
||||
|
||||
def _fix_addersses(**kwargs):
|
||||
def _fix_addresses(**kwargs):
|
||||
for headername in (u'address', u'to', u'cc', u'bcc'):
|
||||
try:
|
||||
headervalue = kwargs[headername]
|
||||
@ -260,7 +260,7 @@ def mailto_format(**kwargs):
|
||||
"""
|
||||
# @TODO: implement utf8 option
|
||||
|
||||
kwargs = _fix_addersses(**kwargs)
|
||||
kwargs = _fix_addresses(**kwargs)
|
||||
parts = []
|
||||
for headername in (u'to', u'cc', u'bcc', u'subject', u'body', u'attach'):
|
||||
if kwargs.has_key(headername):
|
||||
|
@ -282,8 +282,38 @@ def split_filename(path):
|
||||
else:
|
||||
return os.path.split(path)
|
||||
|
||||
def get_web_page(url, update_openlp=False):
|
||||
"""
|
||||
Attempts to download the webpage at url and returns that page or None.
|
||||
|
||||
``url``
|
||||
The URL to be downloaded.
|
||||
|
||||
``update_openlp``
|
||||
Tells OpenLP to update itself if the page is successfully downloaded.
|
||||
Defaults to False.
|
||||
"""
|
||||
# TODO: Add proxy usage. Get proxy info from OpenLP settings, add to a
|
||||
# proxy_handler, build into an opener and install the opener into urllib2.
|
||||
# http://docs.python.org/library/urllib2.html
|
||||
if not url:
|
||||
return None
|
||||
page = None
|
||||
log.debug(u'Downloading URL = %s' % url)
|
||||
try:
|
||||
page = urllib2.urlopen(url)
|
||||
log.debug(u'Downloaded URL = %s' % page.geturl())
|
||||
except urllib2.URLError:
|
||||
log.exception(u'The web page could not be downloaded')
|
||||
if not page:
|
||||
return None
|
||||
if update_openlp:
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
return page
|
||||
|
||||
from languagemanager import LanguageManager
|
||||
from actions import ActionList
|
||||
|
||||
__all__ = [u'AppLocation', u'check_latest_version', u'add_actions',
|
||||
u'get_filesystem_encoding', u'LanguageManager', u'ActionList']
|
||||
u'get_filesystem_encoding', u'LanguageManager', u'ActionList',
|
||||
u'get_web_page']
|
||||
|
@ -23,20 +23,22 @@
|
||||
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
||||
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
||||
###############################################################################
|
||||
|
||||
"""
|
||||
The :mod:`http` module enables OpenLP to retrieve scripture from bible
|
||||
websites.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
import socket
|
||||
import urllib
|
||||
import urllib2
|
||||
from HTMLParser import HTMLParseError
|
||||
|
||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
||||
|
||||
from openlp.core.lib import Receiver, translate
|
||||
from openlp.core.utils import AppLocation
|
||||
from openlp.core.utils import AppLocation, get_web_page
|
||||
from openlp.plugins.bibles.lib import SearchResults
|
||||
from openlp.plugins.bibles.lib.db import BibleDB, Book
|
||||
|
||||
@ -204,29 +206,11 @@ class BGExtract(object):
|
||||
url_params = urllib.urlencode(
|
||||
{u'search': u'%s %s' % (bookname, chapter),
|
||||
u'version': u'%s' % version})
|
||||
page = None
|
||||
try:
|
||||
page = urllib2.urlopen(
|
||||
u'http://www.biblegateway.com/passage/?%s' % url_params)
|
||||
log.debug(u'BibleGateway url = %s' % page.geturl())
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
except urllib2.URLError:
|
||||
log.exception(u'The web bible page could not be downloaded.')
|
||||
send_error_message(u'download')
|
||||
finally:
|
||||
if not page:
|
||||
return None
|
||||
cleaner = [(re.compile(' |<br />|\'\+\''), lambda match: '')]
|
||||
soup = None
|
||||
try:
|
||||
soup = BeautifulSoup(page, markupMassage=cleaner)
|
||||
except HTMLParseError:
|
||||
log.exception(u'BeautifulSoup could not parse the bible page.')
|
||||
Receiver.send_message(u'bibles_download_error')
|
||||
send_error_message(u'parse')
|
||||
finally:
|
||||
if not soup:
|
||||
return None
|
||||
soup = get_soup_for_bible_ref(
|
||||
u'http://www.biblegateway.com/passage/?%s' % url_params, cleaner)
|
||||
if not soup:
|
||||
return None
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
footnotes = soup.findAll(u'sup', u'footnote')
|
||||
if footnotes:
|
||||
@ -280,35 +264,15 @@ class BSExtract(object):
|
||||
log.debug(u'get_bible_chapter %s,%s,%s', version, bookname, chapter)
|
||||
chapter_url = u'http://m.bibleserver.com/text/%s/%s%s' % \
|
||||
(version, bookname, chapter)
|
||||
|
||||
log.debug(u'URL: %s', chapter_url)
|
||||
page = None
|
||||
try:
|
||||
page = urllib2.urlopen(chapter_url)
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
except urllib2.URLError:
|
||||
log.exception(u'The web bible page could not be downloaded.')
|
||||
send_error_message(u'download')
|
||||
finally:
|
||||
if not page:
|
||||
return None
|
||||
soup = None
|
||||
try:
|
||||
soup = BeautifulSoup(page)
|
||||
except HTMLParseError:
|
||||
log.exception(u'BeautifulSoup could not parse the bible page.')
|
||||
send_error_message(u'parse')
|
||||
soup = get_soup_for_bible_ref(chapter_url)
|
||||
if not soup:
|
||||
return None
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
content = None
|
||||
try:
|
||||
content = soup.find(u'div', u'content').find(u'div').findAll(u'div')
|
||||
except:
|
||||
content = soup.find(u'div', u'content').find(u'div').findAll(u'div')
|
||||
if not content:
|
||||
log.exception(u'No verses found in the Bibleserver response.')
|
||||
send_error_message(u'parse')
|
||||
finally:
|
||||
if not content:
|
||||
return None
|
||||
return None
|
||||
verse_number = re.compile(r'v(\d{2})(\d{3})(\d{3}) verse')
|
||||
verses = {}
|
||||
for verse in content:
|
||||
@ -344,21 +308,8 @@ class CWExtract(object):
|
||||
urlbookname = bookname.replace(u' ', u'-')
|
||||
chapter_url = u'http://www.biblestudytools.com/%s/%s/%s.html' % \
|
||||
(version, urlbookname.lower(), chapter)
|
||||
log.debug(u'URL: %s', chapter_url)
|
||||
page = None
|
||||
try:
|
||||
page = urllib2.urlopen(chapter_url)
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
except urllib2.URLError:
|
||||
log.exception(u'The web bible page could not be downloaded.')
|
||||
send_error_message(u'download')
|
||||
return None
|
||||
soup = None
|
||||
try:
|
||||
soup = BeautifulSoup(page)
|
||||
except HTMLParseError:
|
||||
log.exception(u'BeautifulSoup could not parse the bible page.')
|
||||
send_error_message(u'parse')
|
||||
soup = get_soup_for_bible_ref(chapter_url)
|
||||
if not soup:
|
||||
return None
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
htmlverses = soup.findAll(u'span', u'versetext')
|
||||
@ -416,6 +367,8 @@ class HTTPBible(BibleDB):
|
||||
BibleDB.__init__(self, parent, **kwargs)
|
||||
self.download_source = kwargs[u'download_source']
|
||||
self.download_name = kwargs[u'download_name']
|
||||
# TODO: Clean up proxy stuff. We probably want one global proxy per
|
||||
# connection type (HTTP and HTTPS) at most.
|
||||
self.proxy_server = None
|
||||
self.proxy_username = None
|
||||
self.proxy_password = None
|
||||
@ -471,7 +424,7 @@ class HTTPBible(BibleDB):
|
||||
book = reference[0]
|
||||
db_book = self.get_book(book)
|
||||
if not db_book:
|
||||
book_details = self.lookup_book(book)
|
||||
book_details = HTTPBooks.get_book(book)
|
||||
if not book_details:
|
||||
Receiver.send_message(u'openlp_error_message', {
|
||||
u'title': translate('BiblesPlugin', 'No Book Found'),
|
||||
@ -511,12 +464,12 @@ class HTTPBible(BibleDB):
|
||||
log.debug(u'get_chapter %s, %s', book, chapter)
|
||||
log.debug(u'source = %s', self.download_source)
|
||||
if self.download_source.lower() == u'crosswalk':
|
||||
ev = CWExtract(self.proxy_server)
|
||||
handler = CWExtract(self.proxy_server)
|
||||
elif self.download_source.lower() == u'biblegateway':
|
||||
ev = BGExtract(self.proxy_server)
|
||||
handler = BGExtract(self.proxy_server)
|
||||
elif self.download_source.lower() == u'bibleserver':
|
||||
ev = BSExtract(self.proxy_server)
|
||||
return ev.get_bible_chapter(self.download_name, book, chapter)
|
||||
handler = BSExtract(self.proxy_server)
|
||||
return handler.get_bible_chapter(self.download_name, book, chapter)
|
||||
|
||||
def get_books(self):
|
||||
"""
|
||||
@ -525,12 +478,6 @@ class HTTPBible(BibleDB):
|
||||
return [Book.populate(name=book['name'])
|
||||
for book in HTTPBooks.get_books()]
|
||||
|
||||
def lookup_book(self, book):
|
||||
"""
|
||||
Look up the name of a book.
|
||||
"""
|
||||
return HTTPBooks.get_book(book)
|
||||
|
||||
def get_chapter_count(self, book):
|
||||
"""
|
||||
Return the number of chapters in a particular book.
|
||||
@ -549,8 +496,38 @@ class HTTPBible(BibleDB):
|
||||
"""
|
||||
return HTTPBooks.get_verse_count(book, chapter)
|
||||
|
||||
def get_soup_for_bible_ref(reference_url, cleaner=None):
|
||||
"""
|
||||
Gets a webpage and returns a parsed and optionally cleaned soup or None.
|
||||
|
||||
``reference_url``
|
||||
The URL to obtain the soup from.
|
||||
|
||||
``cleaner``
|
||||
An optional regex to use during webpage parsing.
|
||||
"""
|
||||
if not reference_url:
|
||||
return None
|
||||
page = get_web_page(reference_url, True)
|
||||
if not page:
|
||||
send_error_message(u'download')
|
||||
return None
|
||||
soup = None
|
||||
try:
|
||||
if cleaner:
|
||||
soup = BeautifulSoup(page, markupMassage=cleaner)
|
||||
else:
|
||||
soup = BeautifulSoup(page)
|
||||
except HTMLParseError:
|
||||
log.exception(u'BeautifulSoup could not parse the bible page.')
|
||||
if not soup:
|
||||
send_error_message(u'parse')
|
||||
return None
|
||||
Receiver.send_message(u'openlp_process_events')
|
||||
return soup
|
||||
|
||||
def send_error_message(reason):
|
||||
if reason == u'downoad':
|
||||
if reason == u'download':
|
||||
Receiver.send_message(u'openlp_error_message', {
|
||||
u'title': translate('BiblePlugin.HTTPBible', 'Download Error'),
|
||||
u'message': translate('BiblePlugin.HTTPBible', 'There was a '
|
||||
@ -563,5 +540,5 @@ def send_error_message(reason):
|
||||
u'title': translate('BiblePlugin.HTTPBible', 'Parse Error'),
|
||||
u'message': translate('BiblePlugin.HTTPBible', 'There was a '
|
||||
'problem extracting your verse selection. If this error continues '
|
||||
'continues to occur consider reporting a bug.')
|
||||
'to occur consider reporting a bug.')
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user