2009-09-08 19:58:05 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2013-01-01 16:33:41 +00:00
|
|
|
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
|
2009-09-08 19:58:05 +00:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# OpenLP - Open Source Lyrics Projection #
|
|
|
|
# --------------------------------------------------------------------------- #
|
2012-12-29 20:56:56 +00:00
|
|
|
# Copyright (c) 2008-2013 Raoul Snyman #
|
|
|
|
# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan #
|
2012-06-22 14:14:53 +00:00
|
|
|
# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, #
|
2012-11-11 21:16:14 +00:00
|
|
|
# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer. #
|
2012-10-21 13:16:22 +00:00
|
|
|
# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru, #
|
|
|
|
# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, #
|
|
|
|
# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock, #
|
2012-12-01 07:57:54 +00:00
|
|
|
# Frode Woldsund, Martin Zibricky, Patrick Zimmermann #
|
2009-09-08 19:58:05 +00:00
|
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
# This program is free software; you can redistribute it and/or modify it #
|
|
|
|
# under the terms of the GNU General Public License as published by the Free #
|
|
|
|
# Software Foundation; version 2 of the License. #
|
|
|
|
# #
|
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT #
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
|
|
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
|
|
|
|
# more details. #
|
|
|
|
# #
|
|
|
|
# You should have received a copy of the GNU General Public License along #
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
|
|
|
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
|
|
|
###############################################################################
|
2011-01-10 01:46:47 +00:00
|
|
|
"""
|
2013-04-18 17:45:14 +00:00
|
|
|
The :mod:`http` module enables OpenLP to retrieve scripture from bible websites.
|
2011-01-10 01:46:47 +00:00
|
|
|
"""
|
2013-06-23 19:51:17 +00:00
|
|
|
import os
|
2009-09-25 00:43:42 +00:00
|
|
|
import logging
|
2010-03-26 20:50:55 +00:00
|
|
|
import re
|
2010-12-31 23:06:35 +00:00
|
|
|
import socket
|
2013-08-31 18:17:38 +00:00
|
|
|
import urllib.request, urllib.parse, urllib.error
|
|
|
|
from html.parser import HTMLParseError
|
2010-01-24 07:08:14 +00:00
|
|
|
|
2013-04-05 19:58:13 +00:00
|
|
|
from bs4 import BeautifulSoup, NavigableString, Tag
|
2008-10-30 20:44:54 +00:00
|
|
|
|
2013-02-05 06:54:55 +00:00
|
|
|
from openlp.core.lib import Registry, translate
|
2011-02-02 23:12:31 +00:00
|
|
|
from openlp.core.lib.ui import critical_error_message_box
|
2011-07-07 18:03:12 +00:00
|
|
|
from openlp.core.utils import get_web_page
|
2010-10-27 17:42:10 +00:00
|
|
|
from openlp.plugins.bibles.lib import SearchResults
|
2013-01-01 16:33:41 +00:00
|
|
|
from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB, Book
|
2008-11-09 20:11:31 +00:00
|
|
|
|
2013-04-05 20:20:51 +00:00
|
|
|
CLEANER_REGEX = re.compile(r' |<br />|\'\+\'')
|
2013-04-05 19:58:13 +00:00
|
|
|
FIX_PUNKCTUATION_REGEX = re.compile(r'[ ]+([.,;])')
|
|
|
|
REDUCE_SPACES_REGEX = re.compile(r'[ ]{2,}')
|
2012-06-12 21:25:31 +00:00
|
|
|
UGLY_CHARS = {
|
2013-08-31 18:17:38 +00:00
|
|
|
'\u2014': ' - ',
|
|
|
|
'\u2018': '\'',
|
|
|
|
'\u2019': '\'',
|
|
|
|
'\u201c': '"',
|
|
|
|
'\u201d': '"',
|
|
|
|
' ': ' '
|
2012-06-12 21:25:31 +00:00
|
|
|
}
|
2013-04-05 19:58:13 +00:00
|
|
|
VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*')
|
|
|
|
|
2012-06-12 21:25:31 +00:00
|
|
|
|
2010-02-27 09:18:26 +00:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2013-04-05 19:58:13 +00:00
|
|
|
|
2010-07-29 14:36:02 +00:00
|
|
|
class BGExtract(object):
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
|
|
|
Extract verses from BibleGateway
|
|
|
|
"""
|
2012-04-22 18:19:36 +00:00
|
|
|
def __init__(self, proxy_url=None):
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('BGExtract.init("%s")', proxy_url)
|
2012-04-22 18:19:36 +00:00
|
|
|
self.proxy_url = proxy_url
|
2010-12-31 23:06:35 +00:00
|
|
|
socket.setdefaulttimeout(30)
|
2009-06-16 18:21:24 +00:00
|
|
|
|
2012-06-12 21:25:31 +00:00
|
|
|
def _remove_elements(self, parent, tag, class_=None):
|
2008-11-09 20:11:31 +00:00
|
|
|
"""
|
2012-06-12 21:25:31 +00:00
|
|
|
Remove a particular element from the BeautifulSoup tree.
|
2009-09-21 17:56:36 +00:00
|
|
|
|
2012-06-12 21:25:31 +00:00
|
|
|
``parent``
|
|
|
|
The element from which items need to be removed.
|
2009-09-21 17:56:36 +00:00
|
|
|
|
2012-06-12 21:25:31 +00:00
|
|
|
``tag``
|
|
|
|
A string of the tab type, e.g. "div"
|
2009-09-21 17:56:36 +00:00
|
|
|
|
2012-06-12 21:25:31 +00:00
|
|
|
``class_``
|
|
|
|
An HTML class attribute for further qualification.
|
|
|
|
"""
|
|
|
|
if class_:
|
2013-04-05 19:58:13 +00:00
|
|
|
all_tags = parent.find_all(tag, class_)
|
2012-06-12 21:25:31 +00:00
|
|
|
else:
|
2013-04-05 19:58:13 +00:00
|
|
|
all_tags = parent.find_all(tag)
|
2012-06-12 21:25:31 +00:00
|
|
|
for element in all_tags:
|
|
|
|
element.extract()
|
|
|
|
|
|
|
|
def _extract_verse(self, tag):
|
|
|
|
"""
|
|
|
|
Extract a verse (or part of a verse) from a tag.
|
|
|
|
|
|
|
|
``tag``
|
|
|
|
The BeautifulSoup Tag element with the stuff we want.
|
|
|
|
"""
|
|
|
|
if isinstance(tag, NavigableString):
|
2013-08-31 18:17:38 +00:00
|
|
|
return None, str(tag)
|
2013-04-15 19:54:27 +00:00
|
|
|
elif tag.get('class')[0] == "versenum" or tag.get('class')[0] == 'versenum mid-line':
|
2013-08-31 18:17:38 +00:00
|
|
|
verse = str(tag.string).replace('[', '').replace(']', '').strip()
|
2012-06-12 21:25:31 +00:00
|
|
|
return verse, None
|
2013-04-15 19:54:27 +00:00
|
|
|
elif tag.get('class')[0] == 'chapternum':
|
2012-06-12 21:25:31 +00:00
|
|
|
verse = '1'
|
|
|
|
return verse, None
|
|
|
|
else:
|
2013-04-15 19:54:27 +00:00
|
|
|
verse = None
|
|
|
|
text = ''
|
2012-06-12 21:25:31 +00:00
|
|
|
for child in tag.contents:
|
|
|
|
c_verse, c_text = self._extract_verse(child)
|
|
|
|
if c_verse:
|
|
|
|
verse = c_verse
|
|
|
|
if text and c_text:
|
|
|
|
text += c_text
|
|
|
|
elif c_text is not None:
|
|
|
|
text = c_text
|
|
|
|
return verse, text
|
|
|
|
|
|
|
|
def _clean_soup(self, tag):
|
|
|
|
"""
|
|
|
|
Remove all the rubbish from the HTML page.
|
|
|
|
|
|
|
|
``tag``
|
|
|
|
The base tag within which we want to remove stuff.
|
|
|
|
"""
|
|
|
|
self._remove_elements(tag, 'sup', 'crossreference')
|
|
|
|
self._remove_elements(tag, 'sup', 'footnote')
|
|
|
|
self._remove_elements(tag, 'div', 'footnotes')
|
|
|
|
self._remove_elements(tag, 'div', 'crossrefs')
|
|
|
|
self._remove_elements(tag, 'h3')
|
2012-09-13 19:49:31 +00:00
|
|
|
self._remove_elements(tag, 'h4')
|
|
|
|
self._remove_elements(tag, 'h5')
|
2012-06-12 21:25:31 +00:00
|
|
|
|
|
|
|
def _extract_verses(self, tags):
|
|
|
|
"""
|
|
|
|
Extract all the verses from a pre-prepared list of HTML tags.
|
|
|
|
|
|
|
|
``tags``
|
|
|
|
A list of BeautifulSoup Tag elements.
|
|
|
|
"""
|
|
|
|
verses = []
|
|
|
|
tags = tags[::-1]
|
|
|
|
current_text = ''
|
|
|
|
for tag in tags:
|
2013-04-15 19:54:27 +00:00
|
|
|
verse = None
|
|
|
|
text = ''
|
2012-06-12 21:25:31 +00:00
|
|
|
for child in tag.contents:
|
|
|
|
c_verse, c_text = self._extract_verse(child)
|
|
|
|
if c_verse:
|
|
|
|
verse = c_verse
|
|
|
|
if text and c_text:
|
|
|
|
text += c_text
|
|
|
|
elif c_text is not None:
|
|
|
|
text = c_text
|
|
|
|
if not verse:
|
|
|
|
current_text = text + ' ' + current_text
|
|
|
|
else:
|
|
|
|
text += ' ' + current_text
|
|
|
|
current_text = ''
|
|
|
|
if text:
|
2013-08-31 18:17:38 +00:00
|
|
|
for old, new in UGLY_CHARS.items():
|
2012-06-12 21:25:31 +00:00
|
|
|
text = text.replace(old, new)
|
2013-08-31 18:17:38 +00:00
|
|
|
text = ' '.join(text.split())
|
2012-06-12 21:25:31 +00:00
|
|
|
if verse and text:
|
2012-09-14 19:53:55 +00:00
|
|
|
verse = verse.strip()
|
|
|
|
try:
|
|
|
|
verse = int(verse)
|
2012-09-22 18:58:04 +00:00
|
|
|
except ValueError:
|
2013-08-31 18:17:38 +00:00
|
|
|
verse_parts = verse.split('-')
|
2012-09-14 19:53:55 +00:00
|
|
|
if len(verse_parts) > 1:
|
|
|
|
verse = int(verse_parts[0])
|
2012-09-22 18:58:04 +00:00
|
|
|
except TypeError:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.warn('Illegal verse number: %s', str(verse))
|
2012-09-14 19:53:55 +00:00
|
|
|
verses.append((verse, text))
|
2012-06-12 21:25:31 +00:00
|
|
|
verse_list = {}
|
|
|
|
for verse, text in verses[::-1]:
|
|
|
|
verse_list[verse] = text
|
|
|
|
return verse_list
|
|
|
|
|
2012-09-13 19:49:31 +00:00
|
|
|
def _extract_verses_old(self, div):
|
|
|
|
"""
|
2013-04-05 19:58:13 +00:00
|
|
|
Use the old style of parsing for those Bibles on BG who mysteriously have not been migrated to the new (still
|
|
|
|
broken) HTML.
|
2012-09-13 19:49:31 +00:00
|
|
|
|
|
|
|
``div``
|
|
|
|
The parent div.
|
2008-11-09 20:11:31 +00:00
|
|
|
"""
|
2010-07-23 02:19:35 +00:00
|
|
|
verse_list = {}
|
2011-03-14 19:26:38 +00:00
|
|
|
# Cater for inconsistent mark up in the first verse of a chapter.
|
2013-08-31 18:17:38 +00:00
|
|
|
first_verse = div.find('versenum')
|
2012-04-29 15:31:56 +00:00
|
|
|
if first_verse and first_verse.contents:
|
2013-08-31 18:17:38 +00:00
|
|
|
verse_list[1] = str(first_verse.contents[0])
|
|
|
|
for verse in div('sup', 'versenum'):
|
2013-04-05 19:58:13 +00:00
|
|
|
raw_verse_num = verse.next_element
|
2011-02-24 19:13:51 +00:00
|
|
|
clean_verse_num = 0
|
2013-04-05 19:58:13 +00:00
|
|
|
# Not all verses exist in all translations and may or may not be represented by a verse number. If they are
|
|
|
|
# not fine, if they are it will probably be in a format that breaks int(). We will then have no idea what
|
|
|
|
# garbage may be sucked in to the verse text so if we do not get a clean int() then ignore the verse
|
|
|
|
# completely.
|
2011-02-24 19:13:51 +00:00
|
|
|
try:
|
|
|
|
clean_verse_num = int(str(raw_verse_num))
|
|
|
|
except ValueError:
|
2013-08-31 18:17:38 +00:00
|
|
|
verse_parts = str(raw_verse_num).split('-')
|
2012-09-22 18:58:04 +00:00
|
|
|
if len(verse_parts) > 1:
|
|
|
|
clean_verse_num = int(verse_parts[0])
|
|
|
|
except TypeError:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.warn('Illegal verse number: %s', str(raw_verse_num))
|
2011-02-24 19:13:51 +00:00
|
|
|
if clean_verse_num:
|
2013-04-05 19:58:13 +00:00
|
|
|
verse_text = raw_verse_num.next_element
|
|
|
|
part = raw_verse_num.next_element.next_element
|
2013-08-31 18:17:38 +00:00
|
|
|
while not (isinstance(part, Tag) and part.get('class')[0] == 'versenum'):
|
2011-02-26 00:34:46 +00:00
|
|
|
# While we are still in the same verse grab all the text.
|
|
|
|
if isinstance(part, NavigableString):
|
2012-09-13 19:49:31 +00:00
|
|
|
verse_text += part
|
2013-08-31 18:17:38 +00:00
|
|
|
if isinstance(part.next_element, Tag) and part.next_element.name == 'div':
|
2011-02-26 00:34:46 +00:00
|
|
|
# Run out of verses so stop.
|
|
|
|
break
|
2013-04-05 19:58:13 +00:00
|
|
|
part = part.next_element
|
2013-08-31 18:17:38 +00:00
|
|
|
verse_list[clean_verse_num] = str(verse_text)
|
2012-09-13 19:49:31 +00:00
|
|
|
return verse_list
|
|
|
|
|
2012-04-21 22:29:08 +00:00
|
|
|
def get_bible_chapter(self, version, book_name, chapter):
|
2008-11-09 20:11:31 +00:00
|
|
|
"""
|
2011-05-26 07:25:17 +00:00
|
|
|
Access and decode Bibles via the BibleGateway website.
|
2009-09-21 17:56:36 +00:00
|
|
|
|
2010-03-21 22:16:43 +00:00
|
|
|
``version``
|
2011-05-26 07:25:17 +00:00
|
|
|
The version of the Bible like 31 for New International version.
|
2009-09-21 17:56:36 +00:00
|
|
|
|
2012-04-21 22:29:08 +00:00
|
|
|
``book_name``
|
2010-10-01 14:18:15 +00:00
|
|
|
Name of the Book.
|
2009-09-21 17:56:36 +00:00
|
|
|
|
|
|
|
``chapter``
|
2010-10-01 14:18:15 +00:00
|
|
|
Chapter number.
|
2008-11-09 20:11:31 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('BGExtract.get_bible_chapter("%s", "%s", "%s")', version, book_name, chapter)
|
|
|
|
url_book_name = urllib.parse.quote(book_name.encode("utf-8"))
|
|
|
|
url_params = 'search=%s+%s&version=%s' % (url_book_name, chapter, version)
|
2011-01-10 01:46:47 +00:00
|
|
|
soup = get_soup_for_bible_ref(
|
2013-08-31 18:17:38 +00:00
|
|
|
'http://www.biblegateway.com/passage/?%s' % url_params,
|
2013-04-06 17:59:07 +00:00
|
|
|
pre_parse_regex=r'<meta name.*?/>', pre_parse_substitute='')
|
2011-01-10 01:46:47 +00:00
|
|
|
if not soup:
|
|
|
|
return None
|
2012-06-12 21:25:31 +00:00
|
|
|
div = soup.find('div', 'result-text-style-normal')
|
|
|
|
self._clean_soup(div)
|
2013-04-05 19:58:13 +00:00
|
|
|
span_list = div.find_all('span', 'text')
|
2012-09-13 19:49:31 +00:00
|
|
|
log.debug('Span list: %s', span_list)
|
|
|
|
if not span_list:
|
|
|
|
# If we don't get any spans then we must have the old HTML format
|
|
|
|
verse_list = self._extract_verses_old(div)
|
|
|
|
else:
|
|
|
|
verse_list = self._extract_verses(span_list)
|
2011-02-26 00:34:46 +00:00
|
|
|
if not verse_list:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('No content found in the BibleGateway response.')
|
|
|
|
send_error_message('parse')
|
2011-02-26 00:34:46 +00:00
|
|
|
return None
|
2012-04-21 22:29:08 +00:00
|
|
|
return SearchResults(book_name, chapter, verse_list)
|
2009-06-16 18:21:24 +00:00
|
|
|
|
2011-03-17 18:36:54 +00:00
|
|
|
def get_books_from_http(self, version):
|
|
|
|
"""
|
2011-05-26 07:25:17 +00:00
|
|
|
Load a list of all books a Bible contaions from BibleGateway website.
|
2011-03-17 18:36:54 +00:00
|
|
|
|
|
|
|
``version``
|
2011-05-26 07:25:17 +00:00
|
|
|
The version of the Bible like NIV for New International Version
|
2011-03-17 18:36:54 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('BGExtract.get_books_from_http("%s")', version)
|
|
|
|
url_params = urllib.parse.urlencode({'action': 'getVersionInfo', 'vid': '%s' % version})
|
|
|
|
reference_url = 'http://www.biblegateway.com/versions/?%s#books' % url_params
|
2011-03-17 18:36:54 +00:00
|
|
|
page = get_web_page(reference_url)
|
|
|
|
if not page:
|
2013-08-31 18:17:38 +00:00
|
|
|
send_error_message('download')
|
2011-03-17 18:36:54 +00:00
|
|
|
return None
|
|
|
|
page_source = page.read()
|
2011-07-08 05:57:39 +00:00
|
|
|
try:
|
2013-08-31 18:17:38 +00:00
|
|
|
page_source = str(page_source, 'utf8')
|
2011-07-08 05:57:39 +00:00
|
|
|
except UnicodeDecodeError:
|
2013-08-31 18:17:38 +00:00
|
|
|
page_source = str(page_source, 'cp1251')
|
2011-03-17 18:36:54 +00:00
|
|
|
try:
|
2012-11-11 16:09:23 +00:00
|
|
|
soup = BeautifulSoup(page_source)
|
2011-03-17 18:36:54 +00:00
|
|
|
except HTMLParseError:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.error('BeautifulSoup could not parse the Bible page.')
|
|
|
|
send_error_message('parse')
|
2011-06-04 19:34:36 +00:00
|
|
|
return None
|
2011-03-17 18:36:54 +00:00
|
|
|
if not soup:
|
2013-08-31 18:17:38 +00:00
|
|
|
send_error_message('parse')
|
2011-03-17 18:36:54 +00:00
|
|
|
return None
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2013-08-31 18:17:38 +00:00
|
|
|
content = soup.find('table', 'infotable')
|
2012-11-11 16:09:23 +00:00
|
|
|
if content:
|
2013-08-31 18:17:38 +00:00
|
|
|
content = content.find_all('tr')
|
2011-03-17 18:36:54 +00:00
|
|
|
if not content:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.error('No books found in the Biblegateway response.')
|
|
|
|
send_error_message('parse')
|
2011-03-17 18:36:54 +00:00
|
|
|
return None
|
|
|
|
books = []
|
|
|
|
for book in content:
|
2013-08-31 18:17:38 +00:00
|
|
|
book = book.find('td')
|
2011-03-17 18:36:54 +00:00
|
|
|
if book:
|
|
|
|
books.append(book.contents[0])
|
|
|
|
return books
|
|
|
|
|
2013-02-05 21:42:15 +00:00
|
|
|
def _get_application(self):
|
|
|
|
"""
|
2013-06-21 05:16:35 +00:00
|
|
|
Adds the openlp to the class dynamically.
|
|
|
|
Windows needs to access the application in a dynamic manner.
|
2013-02-05 21:42:15 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
if os.name == 'nt':
|
|
|
|
return Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
else:
|
2013-08-31 18:17:38 +00:00
|
|
|
if not hasattr(self, '_application'):
|
|
|
|
self._application = Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
return self._application
|
2013-02-05 21:42:15 +00:00
|
|
|
|
|
|
|
application = property(_get_application)
|
|
|
|
|
2010-07-29 14:36:02 +00:00
|
|
|
|
2010-12-11 23:54:07 +00:00
|
|
|
class BSExtract(object):
|
|
|
|
"""
|
|
|
|
Extract verses from Bibleserver.com
|
|
|
|
"""
|
2012-04-22 18:19:36 +00:00
|
|
|
def __init__(self, proxy_url=None):
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('BSExtract.init("%s")', proxy_url)
|
2012-04-22 18:19:36 +00:00
|
|
|
self.proxy_url = proxy_url
|
2010-12-31 23:06:35 +00:00
|
|
|
socket.setdefaulttimeout(30)
|
2010-12-11 23:54:07 +00:00
|
|
|
|
2012-04-21 22:29:08 +00:00
|
|
|
def get_bible_chapter(self, version, book_name, chapter):
|
2010-12-11 23:54:07 +00:00
|
|
|
"""
|
2010-12-17 22:10:29 +00:00
|
|
|
Access and decode bibles via Bibleserver mobile website
|
2010-12-11 23:54:07 +00:00
|
|
|
|
|
|
|
``version``
|
|
|
|
The version of the bible like NIV for New International Version
|
|
|
|
|
2012-04-21 22:29:08 +00:00
|
|
|
``book_name``
|
2010-12-17 22:10:29 +00:00
|
|
|
Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung
|
2010-12-11 23:54:07 +00:00
|
|
|
|
|
|
|
``chapter``
|
|
|
|
Chapter number
|
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('BSExtract.get_bible_chapter("%s", "%s", "%s")', version, book_name, chapter)
|
|
|
|
url_version = urllib.parse.quote(version.encode("utf-8"))
|
|
|
|
url_book_name = urllib.parse.quote(book_name.encode("utf-8"))
|
|
|
|
chapter_url = 'http://m.bibleserver.com/text/%s/%s%d' % (url_version, url_book_name, chapter)
|
|
|
|
header = ('Accept-Language', 'en')
|
2011-01-11 23:43:27 +00:00
|
|
|
soup = get_soup_for_bible_ref(chapter_url, header)
|
2011-01-10 01:46:47 +00:00
|
|
|
if not soup:
|
2011-01-01 10:33:14 +00:00
|
|
|
return None
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2013-08-31 18:17:38 +00:00
|
|
|
content = soup.find('div', 'content')
|
2011-01-10 01:46:47 +00:00
|
|
|
if not content:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.error('No verses found in the Bibleserver response.')
|
|
|
|
send_error_message('parse')
|
2011-01-10 01:46:47 +00:00
|
|
|
return None
|
2013-08-31 18:17:38 +00:00
|
|
|
content = content.find('div').find_all('div')
|
2010-12-11 23:54:07 +00:00
|
|
|
verses = {}
|
|
|
|
for verse in content:
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2013-08-31 18:17:38 +00:00
|
|
|
versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class'])))
|
|
|
|
verses[versenumber] = verse.contents[1].rstrip('\n')
|
2012-04-21 22:29:08 +00:00
|
|
|
return SearchResults(book_name, chapter, verses)
|
2010-12-11 23:54:07 +00:00
|
|
|
|
2011-03-17 18:36:54 +00:00
|
|
|
def get_books_from_http(self, version):
|
|
|
|
"""
|
2013-04-05 19:58:13 +00:00
|
|
|
Load a list of all books a Bible contains from Bibleserver mobile website.
|
2011-03-17 18:36:54 +00:00
|
|
|
|
|
|
|
``version``
|
2011-05-26 07:25:17 +00:00
|
|
|
The version of the Bible like NIV for New International Version
|
2011-03-17 18:36:54 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('BSExtract.get_books_from_http("%s")', version)
|
|
|
|
url_version = urllib.parse.quote(version.encode("utf-8"))
|
|
|
|
chapter_url = 'http://m.bibleserver.com/overlay/selectBook?translation=%s' % (url_version)
|
2011-03-17 18:36:54 +00:00
|
|
|
soup = get_soup_for_bible_ref(chapter_url)
|
|
|
|
if not soup:
|
|
|
|
return None
|
2013-08-31 18:17:38 +00:00
|
|
|
content = soup.find('ul')
|
2011-03-17 18:36:54 +00:00
|
|
|
if not content:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.error('No books found in the Bibleserver response.')
|
|
|
|
send_error_message('parse')
|
2011-03-17 18:36:54 +00:00
|
|
|
return None
|
2013-08-31 18:17:38 +00:00
|
|
|
content = content.find_all('li')
|
2013-02-04 21:39:44 +00:00
|
|
|
return [book.contents[0].contents[0] for book in content]
|
2011-03-17 18:36:54 +00:00
|
|
|
|
2013-04-15 19:54:27 +00:00
|
|
|
def _get_application(self):
|
|
|
|
"""
|
2013-06-21 05:16:35 +00:00
|
|
|
Adds the openlp to the class dynamically.
|
|
|
|
Windows needs to access the application in a dynamic manner.
|
2013-04-15 19:54:27 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
if os.name == 'nt':
|
|
|
|
return Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
else:
|
2013-08-31 18:17:38 +00:00
|
|
|
if not hasattr(self, '_application'):
|
|
|
|
self._application = Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
return self._application
|
2013-04-15 19:54:27 +00:00
|
|
|
|
|
|
|
application = property(_get_application)
|
|
|
|
|
2010-12-11 23:54:07 +00:00
|
|
|
|
2010-07-29 14:36:02 +00:00
|
|
|
class CWExtract(object):
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
|
|
|
Extract verses from CrossWalk/BibleStudyTools
|
|
|
|
"""
|
2012-04-22 18:19:36 +00:00
|
|
|
def __init__(self, proxy_url=None):
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('CWExtract.init("%s")', proxy_url)
|
2012-04-22 18:19:36 +00:00
|
|
|
self.proxy_url = proxy_url
|
2010-12-31 23:06:35 +00:00
|
|
|
socket.setdefaulttimeout(30)
|
2009-06-16 18:21:24 +00:00
|
|
|
|
2012-04-21 22:29:08 +00:00
|
|
|
def get_bible_chapter(self, version, book_name, chapter):
|
2008-11-09 20:11:31 +00:00
|
|
|
"""
|
2008-12-06 19:34:48 +00:00
|
|
|
Access and decode bibles via the Crosswalk website
|
2009-09-21 17:56:36 +00:00
|
|
|
|
|
|
|
``version``
|
2011-05-26 07:25:17 +00:00
|
|
|
The version of the Bible like niv for New International Version
|
2009-09-21 17:56:36 +00:00
|
|
|
|
2012-04-21 22:29:08 +00:00
|
|
|
``book_name``
|
2009-09-21 17:56:36 +00:00
|
|
|
Text name of in english e.g. 'gen' for Genesis
|
|
|
|
|
|
|
|
``chapter``
|
|
|
|
Chapter number
|
2009-06-16 18:21:24 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('CWExtract.get_bible_chapter("%s", "%s", "%s")', version, book_name, chapter)
|
|
|
|
url_book_name = book_name.replace(' ', '-')
|
2012-04-21 22:29:08 +00:00
|
|
|
url_book_name = url_book_name.lower()
|
2013-08-31 18:17:38 +00:00
|
|
|
url_book_name = urllib.parse.quote(url_book_name.encode("utf-8"))
|
|
|
|
chapter_url = 'http://www.biblestudytools.com/%s/%s/%s.html' % (version, url_book_name, chapter)
|
2011-01-10 01:46:47 +00:00
|
|
|
soup = get_soup_for_bible_ref(chapter_url)
|
|
|
|
if not soup:
|
2011-01-01 10:33:14 +00:00
|
|
|
return None
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2013-08-31 18:17:38 +00:00
|
|
|
html_verses = soup.find_all('span', 'versetext')
|
2012-04-21 22:29:08 +00:00
|
|
|
if not html_verses:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.error('No verses found in the CrossWalk response.')
|
|
|
|
send_error_message('parse')
|
2011-01-05 19:48:01 +00:00
|
|
|
return None
|
2010-01-24 07:08:14 +00:00
|
|
|
verses = {}
|
2012-04-21 22:29:08 +00:00
|
|
|
for verse in html_verses:
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2012-04-21 22:29:08 +00:00
|
|
|
verse_number = int(verse.contents[0].contents[0])
|
2013-08-31 18:17:38 +00:00
|
|
|
verse_text = ''
|
2010-01-24 07:08:14 +00:00
|
|
|
for part in verse.contents:
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2010-03-26 20:50:55 +00:00
|
|
|
if isinstance(part, NavigableString):
|
2013-02-04 21:26:27 +00:00
|
|
|
verse_text += part
|
2010-03-26 20:50:55 +00:00
|
|
|
elif part and part.attrMap and \
|
2013-08-31 18:17:38 +00:00
|
|
|
(part.attrMap['class'] == 'WordsOfChrist' or part.attrMap['class'] == 'strongs'):
|
2010-03-11 06:15:29 +00:00
|
|
|
for subpart in part.contents:
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2010-03-26 20:50:55 +00:00
|
|
|
if isinstance(subpart, NavigableString):
|
2013-02-04 21:26:27 +00:00
|
|
|
verse_text += subpart
|
2013-08-31 18:17:38 +00:00
|
|
|
elif subpart and subpart.attrMap and subpart.attrMap['class'] == 'strongs':
|
2010-03-27 19:59:35 +00:00
|
|
|
for subsub in subpart.contents:
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2010-03-27 19:59:35 +00:00
|
|
|
if isinstance(subsub, NavigableString):
|
2013-02-04 21:26:27 +00:00
|
|
|
verse_text += subsub
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2013-04-05 19:58:13 +00:00
|
|
|
# Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and .
|
2013-08-31 18:17:38 +00:00
|
|
|
verse_text = verse_text.strip('\n\r\t ')
|
|
|
|
verse_text = REDUCE_SPACES_REGEX.sub(' ', verse_text)
|
2013-04-05 19:58:13 +00:00
|
|
|
verse_text = FIX_PUNKCTUATION_REGEX.sub(r'\1', verse_text)
|
2012-04-21 22:29:08 +00:00
|
|
|
verses[verse_number] = verse_text
|
|
|
|
return SearchResults(book_name, chapter, verses)
|
2010-01-24 07:08:14 +00:00
|
|
|
|
2011-03-17 18:36:54 +00:00
|
|
|
def get_books_from_http(self, version):
|
|
|
|
"""
|
2011-05-26 07:25:17 +00:00
|
|
|
Load a list of all books a Bible contain from the Crosswalk website.
|
2011-03-17 18:36:54 +00:00
|
|
|
|
|
|
|
``version``
|
|
|
|
The version of the bible like NIV for New International Version
|
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('CWExtract.get_books_from_http("%s")', version)
|
|
|
|
chapter_url = 'http://www.biblestudytools.com/%s/' % (version)
|
2011-03-17 18:36:54 +00:00
|
|
|
soup = get_soup_for_bible_ref(chapter_url)
|
|
|
|
if not soup:
|
|
|
|
return None
|
2013-08-31 18:17:38 +00:00
|
|
|
content = soup.find('div', {'class': 'Body'})
|
|
|
|
content = content.find('ul', {'class': 'parent'})
|
2011-03-17 18:36:54 +00:00
|
|
|
if not content:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.error('No books found in the Crosswalk response.')
|
|
|
|
send_error_message('parse')
|
2011-03-17 18:36:54 +00:00
|
|
|
return None
|
2013-08-31 18:17:38 +00:00
|
|
|
content = content.find_all('li')
|
2011-03-17 18:36:54 +00:00
|
|
|
books = []
|
|
|
|
for book in content:
|
2013-08-31 18:17:38 +00:00
|
|
|
book = book.find('a')
|
2011-03-17 18:36:54 +00:00
|
|
|
books.append(book.contents[0])
|
|
|
|
return books
|
|
|
|
|
2013-02-05 21:42:15 +00:00
|
|
|
def _get_application(self):
|
|
|
|
"""
|
2013-06-21 05:16:35 +00:00
|
|
|
Adds the openlp to the class dynamically.
|
|
|
|
Windows needs to access the application in a dynamic manner.
|
2013-02-05 21:42:15 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
if os.name == 'nt':
|
|
|
|
return Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
else:
|
2013-08-31 18:17:38 +00:00
|
|
|
if not hasattr(self, '_application'):
|
|
|
|
self._application = Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
return self._application
|
2013-02-05 21:42:15 +00:00
|
|
|
|
|
|
|
application = property(_get_application)
|
|
|
|
|
2009-06-16 18:21:24 +00:00
|
|
|
|
2009-12-30 17:29:08 +00:00
|
|
|
class HTTPBible(BibleDB):
|
2013-08-31 18:17:38 +00:00
|
|
|
log.info('%s HTTPBible loaded', __name__)
|
2009-12-30 17:29:08 +00:00
|
|
|
|
2010-01-31 19:49:01 +00:00
|
|
|
def __init__(self, parent, **kwargs):
|
2008-11-16 07:40:46 +00:00
|
|
|
"""
|
2013-04-05 19:58:13 +00:00
|
|
|
Finds all the bibles defined for the system. Creates an Interface Object for each bible containing connection
|
|
|
|
information.
|
2009-09-21 17:56:36 +00:00
|
|
|
|
2008-11-16 07:40:46 +00:00
|
|
|
Throws Exception if no Bibles are found.
|
|
|
|
|
|
|
|
Init confirms the bible exists and stores the database path.
|
|
|
|
"""
|
2010-01-31 19:49:01 +00:00
|
|
|
BibleDB.__init__(self, parent, **kwargs)
|
2013-08-31 18:17:38 +00:00
|
|
|
self.download_source = kwargs['download_source']
|
|
|
|
self.download_name = kwargs['download_name']
|
2013-04-05 19:58:13 +00:00
|
|
|
# TODO: Clean up proxy stuff. We probably want one global proxy per connection type (HTTP and HTTPS) at most.
|
2010-12-06 19:30:04 +00:00
|
|
|
self.proxy_server = None
|
|
|
|
self.proxy_username = None
|
|
|
|
self.proxy_password = None
|
2013-08-31 18:17:38 +00:00
|
|
|
if 'path' in kwargs:
|
|
|
|
self.path = kwargs['path']
|
|
|
|
if 'proxy_server' in kwargs:
|
|
|
|
self.proxy_server = kwargs['proxy_server']
|
|
|
|
if 'proxy_username' in kwargs:
|
|
|
|
self.proxy_username = kwargs['proxy_username']
|
|
|
|
if 'proxy_password' in kwargs:
|
|
|
|
self.proxy_password = kwargs['proxy_password']
|
2009-12-30 17:29:08 +00:00
|
|
|
|
2011-05-26 19:13:11 +00:00
|
|
|
def do_import(self, bible_name=None):
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
2013-04-05 19:58:13 +00:00
|
|
|
Run the import. This method overrides the parent class method. Returns ``True`` on success, ``False`` on
|
|
|
|
failure.
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
2013-03-07 08:05:43 +00:00
|
|
|
self.wizard.progress_bar.setMaximum(68)
|
|
|
|
self.wizard.increment_progress_bar(translate('BiblesPlugin.HTTPBible', 'Registering Bible and loading books...'))
|
2013-08-31 18:17:38 +00:00
|
|
|
self.save_meta('download_source', self.download_source)
|
|
|
|
self.save_meta('download_name', self.download_name)
|
2009-12-30 17:29:08 +00:00
|
|
|
if self.proxy_server:
|
2013-08-31 18:17:38 +00:00
|
|
|
self.save_meta('proxy_server', self.proxy_server)
|
2009-12-30 17:29:08 +00:00
|
|
|
if self.proxy_username:
|
2010-11-09 16:56:16 +00:00
|
|
|
# Store the proxy userid.
|
2013-08-31 18:17:38 +00:00
|
|
|
self.save_meta('proxy_username', self.proxy_username)
|
2009-12-30 17:29:08 +00:00
|
|
|
if self.proxy_password:
|
2010-11-09 16:56:16 +00:00
|
|
|
# Store the proxy password.
|
2013-08-31 18:17:38 +00:00
|
|
|
self.save_meta('proxy_password', self.proxy_password)
|
|
|
|
if self.download_source.lower() == 'crosswalk':
|
2011-03-17 18:36:54 +00:00
|
|
|
handler = CWExtract(self.proxy_server)
|
2013-08-31 18:17:38 +00:00
|
|
|
elif self.download_source.lower() == 'biblegateway':
|
2011-03-17 18:36:54 +00:00
|
|
|
handler = BGExtract(self.proxy_server)
|
2013-08-31 18:17:38 +00:00
|
|
|
elif self.download_source.lower() == 'bibleserver':
|
2011-03-17 18:36:54 +00:00
|
|
|
handler = BSExtract(self.proxy_server)
|
|
|
|
books = handler.get_books_from_http(self.download_name)
|
|
|
|
if not books:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.exception('Importing books from %s - download name: "%s" '\
|
2013-01-11 00:35:00 +00:00
|
|
|
'failed' % (self.download_source, self.download_name))
|
2011-03-17 18:36:54 +00:00
|
|
|
return False
|
2013-03-07 08:05:43 +00:00
|
|
|
self.wizard.progress_bar.setMaximum(len(books) + 2)
|
|
|
|
self.wizard.increment_progress_bar(translate( 'BiblesPlugin.HTTPBible', 'Registering Language...'))
|
2013-02-04 21:39:44 +00:00
|
|
|
bible = BiblesResourcesDB.get_webbible(self.download_name, self.download_source.lower())
|
2013-08-31 18:17:38 +00:00
|
|
|
if bible['language_id']:
|
|
|
|
language_id = bible['language_id']
|
|
|
|
self.save_meta('language_id', language_id)
|
2011-03-17 18:36:54 +00:00
|
|
|
else:
|
2011-05-26 19:13:11 +00:00
|
|
|
language_id = self.get_language(bible_name)
|
2011-04-02 20:22:35 +00:00
|
|
|
if not language_id:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.exception('Importing books from %s failed' % self.filename)
|
2011-04-02 20:22:35 +00:00
|
|
|
return False
|
2011-03-17 18:36:54 +00:00
|
|
|
for book in books:
|
2011-05-03 20:34:39 +00:00
|
|
|
if self.stop_import_flag:
|
|
|
|
break
|
2013-03-07 08:05:43 +00:00
|
|
|
self.wizard.increment_progress_bar(translate(
|
2013-04-05 19:58:13 +00:00
|
|
|
'BiblesPlugin.HTTPBible', 'Importing %s...', 'Importing <book name>...') % book)
|
2013-02-04 21:39:44 +00:00
|
|
|
book_ref_id = self.get_book_ref_id_by_name(book, len(books), language_id)
|
2011-03-18 09:52:54 +00:00
|
|
|
if not book_ref_id:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.exception('Importing books from %s - download name: "%s" '\
|
2013-01-11 00:35:00 +00:00
|
|
|
'failed' % (self.download_source, self.download_name))
|
2011-03-18 09:52:54 +00:00
|
|
|
return False
|
2011-03-17 18:36:54 +00:00
|
|
|
book_details = BiblesResourcesDB.get_book_by_id(book_ref_id)
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('Book details: Name:%s; id:%s; testament_id:%s',
|
|
|
|
book, book_ref_id, book_details['testament_id'])
|
|
|
|
self.create_book(book, book_ref_id, book_details['testament_id'])
|
2011-05-03 20:34:39 +00:00
|
|
|
if self.stop_import_flag:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
2009-12-30 17:29:08 +00:00
|
|
|
|
2011-05-14 21:25:22 +00:00
|
|
|
def get_verses(self, reference_list, show_error=True):
|
2010-01-24 07:08:14 +00:00
|
|
|
"""
|
2013-04-05 19:58:13 +00:00
|
|
|
A reimplementation of the ``BibleDB.get_verses`` method, this one is specifically for web Bibles. It first
|
|
|
|
checks to see if the particular chapter exists in the DB, and if not it pulls it from the web. If the chapter
|
|
|
|
DOES exist, it simply pulls the verses from the DB using the ancestor method.
|
2010-01-24 07:08:14 +00:00
|
|
|
|
|
|
|
``reference_list``
|
2013-04-05 19:58:13 +00:00
|
|
|
This is the list of references the media manager item wants. It is a list of tuples, with the following
|
|
|
|
format::
|
2010-01-24 07:08:14 +00:00
|
|
|
|
2011-03-23 19:18:51 +00:00
|
|
|
(book_reference_id, chapter, start_verse, end_verse)
|
2010-01-24 07:08:14 +00:00
|
|
|
|
2013-04-05 19:58:13 +00:00
|
|
|
Therefore, when you are looking for multiple items, simply break them up into references like this, bundle
|
|
|
|
them into a list. This function then runs through the list, and returns an amalgamated list of ``Verse``
|
|
|
|
objects. For example::
|
2010-01-24 07:08:14 +00:00
|
|
|
|
2011-03-23 19:18:51 +00:00
|
|
|
[(u'35', 1, 1, 1), (u'35', 2, 2, 3)]
|
2010-01-24 07:08:14 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('HTTPBible.get_verses("%s")', reference_list)
|
2010-01-24 07:08:14 +00:00
|
|
|
for reference in reference_list:
|
2011-03-23 19:18:51 +00:00
|
|
|
book_id = reference[0]
|
|
|
|
db_book = self.get_book_by_book_ref_id(book_id)
|
2010-01-24 07:08:14 +00:00
|
|
|
if not db_book:
|
2011-05-18 18:00:40 +00:00
|
|
|
if show_error:
|
|
|
|
critical_error_message_box(
|
|
|
|
translate('BiblesPlugin', 'No Book Found'),
|
2013-06-30 05:45:34 +00:00
|
|
|
translate('BiblesPlugin', 'No matching book could be found in this Bible. Check that you have '
|
|
|
|
'spelled the name of the book correctly.'))
|
2011-03-17 18:36:54 +00:00
|
|
|
return []
|
2010-01-24 07:08:14 +00:00
|
|
|
book = db_book.name
|
2011-03-23 19:18:51 +00:00
|
|
|
if BibleDB.get_verse_count(self, book_id, reference[1]) == 0:
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.set_busy_cursor()
|
2010-05-25 23:47:26 +00:00
|
|
|
search_results = self.get_chapter(book, reference[1])
|
2013-08-15 19:49:51 +00:00
|
|
|
if search_results and search_results.has_verse_list():
|
2010-01-24 07:08:14 +00:00
|
|
|
## We have found a book of the bible lets check to see
|
2011-02-25 17:05:01 +00:00
|
|
|
## if it was there. By reusing the returned book name
|
|
|
|
## we get a correct book. For example it is possible
|
2010-01-24 07:08:14 +00:00
|
|
|
## to request ac and get Acts back.
|
2012-04-21 22:29:08 +00:00
|
|
|
book_name = search_results.book
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2010-11-09 16:56:16 +00:00
|
|
|
# Check to see if book/chapter exists.
|
2012-04-21 22:29:08 +00:00
|
|
|
db_book = self.get_book(book_name)
|
2013-08-15 19:49:51 +00:00
|
|
|
self.create_chapter(db_book.id, search_results.chapter, search_results.verse_list)
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
|
|
|
self.application.set_normal_cursor()
|
|
|
|
self.application.process_events()
|
2011-05-14 21:25:22 +00:00
|
|
|
return BibleDB.get_verses(self, reference_list, show_error)
|
2010-01-24 07:08:14 +00:00
|
|
|
|
2010-05-25 23:47:26 +00:00
|
|
|
def get_chapter(self, book, chapter):
|
2008-11-16 07:40:46 +00:00
|
|
|
"""
|
2010-10-01 14:18:15 +00:00
|
|
|
Receive the request and call the relevant handler methods.
|
2008-11-16 07:40:46 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('HTTPBible.get_chapter("%s", "%s")', book, chapter)
|
|
|
|
log.debug('source = %s', self.download_source)
|
|
|
|
if self.download_source.lower() == 'crosswalk':
|
2011-01-10 01:46:47 +00:00
|
|
|
handler = CWExtract(self.proxy_server)
|
2013-08-31 18:17:38 +00:00
|
|
|
elif self.download_source.lower() == 'biblegateway':
|
2011-01-10 01:46:47 +00:00
|
|
|
handler = BGExtract(self.proxy_server)
|
2013-08-31 18:17:38 +00:00
|
|
|
elif self.download_source.lower() == 'bibleserver':
|
2011-01-10 01:46:47 +00:00
|
|
|
handler = BSExtract(self.proxy_server)
|
|
|
|
return handler.get_bible_chapter(self.download_name, book, chapter)
|
2010-01-24 07:08:14 +00:00
|
|
|
|
|
|
|
def get_books(self):
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
|
|
|
Return the list of books.
|
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('HTTPBible.get_books("%s")', Book.name)
|
2011-03-17 18:36:54 +00:00
|
|
|
return self.get_all_objects(Book, order_by_ref=Book.id)
|
2010-02-06 10:22:20 +00:00
|
|
|
|
2011-04-26 18:18:13 +00:00
|
|
|
def get_chapter_count(self, book):
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
|
|
|
Return the number of chapters in a particular book.
|
2011-07-11 16:32:25 +00:00
|
|
|
|
2011-04-26 18:18:13 +00:00
|
|
|
``book``
|
2013-04-05 19:58:13 +00:00
|
|
|
The book object to get the chapter count for.
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('HTTPBible.get_chapter_count("%s")', book.name)
|
2011-04-26 18:18:13 +00:00
|
|
|
return BiblesResourcesDB.get_chapter_count(book.book_reference_id)
|
2010-02-06 10:22:20 +00:00
|
|
|
|
2011-03-23 19:18:51 +00:00
|
|
|
def get_verse_count(self, book_id, chapter):
|
2010-03-21 22:16:43 +00:00
|
|
|
"""
|
|
|
|
Return the number of verses for the specified chapter and book.
|
|
|
|
|
|
|
|
``book``
|
|
|
|
The name of the book.
|
|
|
|
|
|
|
|
``chapter``
|
|
|
|
The chapter whose verses are being counted.
|
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
log.debug('HTTPBible.get_verse_count("%s", %s)', book_id, chapter)
|
2011-03-23 19:18:51 +00:00
|
|
|
return BiblesResourcesDB.get_verse_count(book_id, chapter)
|
2011-01-05 19:48:01 +00:00
|
|
|
|
2013-02-03 19:23:12 +00:00
|
|
|
def _get_application(self):
|
2013-02-03 09:07:31 +00:00
|
|
|
"""
|
2013-06-21 05:16:35 +00:00
|
|
|
Adds the openlp to the class dynamically.
|
|
|
|
Windows needs to access the application in a dynamic manner.
|
2013-02-03 09:07:31 +00:00
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
if os.name == 'nt':
|
|
|
|
return Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
else:
|
2013-08-31 18:17:38 +00:00
|
|
|
if not hasattr(self, '_application'):
|
|
|
|
self._application = Registry().get('application')
|
2013-06-21 05:16:35 +00:00
|
|
|
return self._application
|
2013-02-03 09:07:31 +00:00
|
|
|
|
2013-02-03 19:23:12 +00:00
|
|
|
application = property(_get_application)
|
2013-02-03 09:07:31 +00:00
|
|
|
|
2013-06-23 19:51:17 +00:00
|
|
|
|
2013-04-06 17:59:07 +00:00
|
|
|
def get_soup_for_bible_ref(reference_url, header=None, pre_parse_regex=None, pre_parse_substitute=None):
|
2011-01-10 01:46:47 +00:00
|
|
|
"""
|
|
|
|
Gets a webpage and returns a parsed and optionally cleaned soup or None.
|
|
|
|
|
|
|
|
``reference_url``
|
|
|
|
The URL to obtain the soup from.
|
|
|
|
|
2011-01-11 23:43:27 +00:00
|
|
|
``header``
|
|
|
|
An optional HTTP header to pass to the bible web server.
|
|
|
|
|
2011-01-31 01:55:25 +00:00
|
|
|
``pre_parse_regex``
|
2013-04-05 19:58:13 +00:00
|
|
|
A regular expression to run on the webpage. Allows manipulation of the webpage before passing to BeautifulSoup
|
|
|
|
for parsing.
|
2011-01-31 01:55:25 +00:00
|
|
|
|
|
|
|
``pre_parse_substitute``
|
|
|
|
The text to replace any matches to the regular expression with.
|
2011-01-10 01:46:47 +00:00
|
|
|
"""
|
|
|
|
if not reference_url:
|
|
|
|
return None
|
2011-01-11 23:43:27 +00:00
|
|
|
page = get_web_page(reference_url, header, True)
|
2011-01-10 01:46:47 +00:00
|
|
|
if not page:
|
2013-08-31 18:17:38 +00:00
|
|
|
send_error_message('download')
|
2011-01-10 01:46:47 +00:00
|
|
|
return None
|
2011-01-31 01:55:25 +00:00
|
|
|
page_source = page.read()
|
|
|
|
if pre_parse_regex and pre_parse_substitute is not None:
|
|
|
|
page_source = re.sub(pre_parse_regex, pre_parse_substitute, page_source)
|
2011-01-10 01:46:47 +00:00
|
|
|
soup = None
|
|
|
|
try:
|
2013-04-06 17:59:07 +00:00
|
|
|
soup = BeautifulSoup(page_source)
|
2013-08-31 18:17:38 +00:00
|
|
|
CLEANER_REGEX.sub('', str(soup))
|
2011-01-10 01:46:47 +00:00
|
|
|
except HTMLParseError:
|
2013-08-31 18:17:38 +00:00
|
|
|
log.exception('BeautifulSoup could not parse the bible page.')
|
2011-01-10 01:46:47 +00:00
|
|
|
if not soup:
|
2013-08-31 18:17:38 +00:00
|
|
|
send_error_message('parse')
|
2011-01-10 01:46:47 +00:00
|
|
|
return None
|
2013-08-31 18:17:38 +00:00
|
|
|
Registry().get('application').process_events()
|
2011-01-10 01:46:47 +00:00
|
|
|
return soup
|
|
|
|
|
2013-03-30 06:56:28 +00:00
|
|
|
|
2011-01-13 17:55:29 +00:00
|
|
|
def send_error_message(error_type):
|
|
|
|
"""
|
|
|
|
Send a standard error message informing the user of an issue.
|
|
|
|
|
|
|
|
``error_type``
|
|
|
|
The type of error that occured for the issue.
|
|
|
|
"""
|
2013-08-31 18:17:38 +00:00
|
|
|
if error_type == 'download':
|
2011-02-02 23:12:31 +00:00
|
|
|
critical_error_message_box(
|
2011-07-11 16:32:25 +00:00
|
|
|
translate('BiblesPlugin.HTTPBible', 'Download Error'),
|
2013-06-30 05:45:34 +00:00
|
|
|
translate('BiblesPlugin.HTTPBible', 'There was a problem downloading your verse selection. Please check '
|
|
|
|
'your Internet connection, and if this error continues to occur please consider reporting a bug.'))
|
2013-08-31 18:17:38 +00:00
|
|
|
elif error_type == 'parse':
|
2011-02-02 23:12:31 +00:00
|
|
|
critical_error_message_box(
|
2011-07-11 16:32:25 +00:00
|
|
|
translate('BiblesPlugin.HTTPBible', 'Parse Error'),
|
2013-06-30 05:45:34 +00:00
|
|
|
translate('BiblesPlugin.HTTPBible', 'There was a problem extracting your verse selection. If this error '
|
|
|
|
'continues to occur please consider reporting a bug.'))
|