From 5ac5c6cd68f17cca78b3358c8f18b7a67b391659 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Thu, 11 Aug 2016 20:02:29 +0100 Subject: [PATCH 01/11] split the web bible importers out io their own files --- .../plugins/bibles/forms/bibleimportform.py | 4 +- openlp/plugins/bibles/lib/importers/http.py | 535 ------------------ openlp/plugins/bibles/lib/importers/osis.py | 2 +- .../openlp_plugins/bibles/test_lib_http.py | 4 +- 4 files changed, 7 insertions(+), 538 deletions(-) diff --git a/openlp/plugins/bibles/forms/bibleimportform.py b/openlp/plugins/bibles/forms/bibleimportform.py index 3d02228ca..e9eee88d5 100644 --- a/openlp/plugins/bibles/forms/bibleimportform.py +++ b/openlp/plugins/bibles/forms/bibleimportform.py @@ -40,7 +40,9 @@ from openlp.core.ui.lib.wizard import OpenLPWizard, WizardStrings from openlp.core.common.languagemanager import get_locale_key from openlp.plugins.bibles.lib.manager import BibleFormat from openlp.plugins.bibles.lib.db import clean_filename -from openlp.plugins.bibles.lib.importers.http import CWExtract, BGExtract, BSExtract +from openlp.plugins.bibles.lib.importers.biblegateway import BGExtract +from openlp.plugins.bibles.lib.importers.bibleserver import BSExtract +from openlp.plugins.bibles.lib.importers.crosswalk import CWExtract log = logging.getLogger(__name__) diff --git a/openlp/plugins/bibles/lib/importers/http.py b/openlp/plugins/bibles/lib/importers/http.py index 6921c9005..5afd107f6 100644 --- a/openlp/plugins/bibles/lib/importers/http.py +++ b/openlp/plugins/bibles/lib/importers/http.py @@ -38,545 +38,10 @@ from openlp.plugins.bibles.lib.bibleimport import BibleImport from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB, Book CLEANER_REGEX = re.compile(r' |
|\'\+\'') -FIX_PUNKCTUATION_REGEX = re.compile(r'[ ]+([.,;])') -REDUCE_SPACES_REGEX = re.compile(r'[ ]{2,}') -UGLY_CHARS = { - '\u2014': ' - ', - '\u2018': '\'', - '\u2019': '\'', - '\u201c': '"', - '\u201d': '"', - ' ': ' ' -} -VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*') - -BIBLESERVER_LANGUAGE_CODE = { - 'fl_1': 'de', - 'fl_2': 'en', - 'fl_3': 'fr', - 'fl_4': 'it', - 'fl_5': 'es', - 'fl_6': 'pt', - 'fl_7': 'ru', - 'fl_8': 'sv', - 'fl_9': 'no', - 'fl_10': 'nl', - 'fl_11': 'cs', - 'fl_12': 'sk', - 'fl_13': 'ro', - 'fl_14': 'hr', - 'fl_15': 'hu', - 'fl_16': 'bg', - 'fl_17': 'ar', - 'fl_18': 'tr', - 'fl_19': 'pl', - 'fl_20': 'da', - 'fl_21': 'zh' -} - -CROSSWALK_LANGUAGES = { - 'Portuguese': 'pt', - 'German': 'de', - 'Italian': 'it', - 'Español': 'es', - 'French': 'fr', - 'Dutch': 'nl' -} log = logging.getLogger(__name__) -class BGExtract(RegistryProperties): - """ - Extract verses from BibleGateway - """ - def __init__(self, proxy_url=None): - log.debug('BGExtract.init("{url}")'.format(url=proxy_url)) - self.proxy_url = proxy_url - socket.setdefaulttimeout(30) - - def _remove_elements(self, parent, tag, class_=None): - """ - Remove a particular element from the BeautifulSoup tree. - - :param parent: The element from which items need to be removed. - :param tag: A string of the tab type, e.g. "div" - :param class_: An HTML class attribute for further qualification. - """ - if class_: - all_tags = parent.find_all(tag, class_) - else: - all_tags = parent.find_all(tag) - for element in all_tags: - element.extract() - - def _extract_verse(self, tag): - """ - Extract a verse (or part of a verse) from a tag. - - :param tag: The BeautifulSoup Tag element with the stuff we want. - """ - if isinstance(tag, NavigableString): - return None, str(tag) - elif tag.get('class') and (tag.get('class')[0] == 'versenum' or tag.get('class')[0] == 'versenum mid-line'): - verse = str(tag.string).replace('[', '').replace(']', '').strip() - return verse, None - elif tag.get('class') and tag.get('class')[0] == 'chapternum': - verse = '1' - return verse, None - else: - verse = None - text = '' - for child in tag.contents: - c_verse, c_text = self._extract_verse(child) - if c_verse: - verse = c_verse - if text and c_text: - text += c_text - elif c_text is not None: - text = c_text - return verse, text - - def _clean_soup(self, tag): - """ - Remove all the rubbish from the HTML page. - - :param tag: The base tag within which we want to remove stuff. - """ - self._remove_elements(tag, 'sup', 'crossreference') - self._remove_elements(tag, 'sup', 'footnote') - self._remove_elements(tag, 'div', 'footnotes') - self._remove_elements(tag, 'div', 'crossrefs') - self._remove_elements(tag, 'h3') - self._remove_elements(tag, 'h4') - self._remove_elements(tag, 'h5') - - def _extract_verses(self, tags): - """ - Extract all the verses from a pre-prepared list of HTML tags. - - :param tags: A list of BeautifulSoup Tag elements. - """ - verses = [] - tags = tags[::-1] - current_text = '' - for tag in tags: - verse = None - text = '' - for child in tag.contents: - c_verse, c_text = self._extract_verse(child) - if c_verse: - verse = c_verse - if text and c_text: - text += c_text - elif c_text is not None: - text = c_text - if not verse: - current_text = text + ' ' + current_text - else: - text += ' ' + current_text - current_text = '' - if text: - for old, new in UGLY_CHARS.items(): - text = text.replace(old, new) - text = ' '.join(text.split()) - if verse and text: - verse = verse.strip() - try: - verse = int(verse) - except ValueError: - verse_parts = verse.split('-') - if len(verse_parts) > 1: - verse = int(verse_parts[0]) - except TypeError: - log.warning('Illegal verse number: {verse:d}'.format(verse=verse)) - verses.append((verse, text)) - verse_list = {} - for verse, text in verses[::-1]: - verse_list[verse] = text - return verse_list - - def _extract_verses_old(self, div): - """ - Use the old style of parsing for those Bibles on BG who mysteriously have not been migrated to the new (still - broken) HTML. - - :param div: The parent div. - """ - verse_list = {} - # Cater for inconsistent mark up in the first verse of a chapter. - first_verse = div.find('versenum') - if first_verse and first_verse.contents: - verse_list[1] = str(first_verse.contents[0]) - for verse in div('sup', 'versenum'): - raw_verse_num = verse.next_element - clean_verse_num = 0 - # Not all verses exist in all translations and may or may not be represented by a verse number. If they are - # not fine, if they are it will probably be in a format that breaks int(). We will then have no idea what - # garbage may be sucked in to the verse text so if we do not get a clean int() then ignore the verse - # completely. - try: - clean_verse_num = int(str(raw_verse_num)) - except ValueError: - verse_parts = str(raw_verse_num).split('-') - if len(verse_parts) > 1: - clean_verse_num = int(verse_parts[0]) - except TypeError: - log.warning('Illegal verse number: {verse:d}'.format(verse=raw_verse_num)) - if clean_verse_num: - verse_text = raw_verse_num.next_element - part = raw_verse_num.next_element.next_element - while not (isinstance(part, Tag) and part.get('class')[0] == 'versenum'): - # While we are still in the same verse grab all the text. - if isinstance(part, NavigableString): - verse_text += part - if isinstance(part.next_element, Tag) and part.next_element.name == 'div': - # Run out of verses so stop. - break - part = part.next_element - verse_list[clean_verse_num] = str(verse_text) - return verse_list - - def get_bible_chapter(self, version, book_name, chapter): - """ - Access and decode Bibles via the BibleGateway website. - - :param version: The version of the Bible like 31 for New International version. - :param book_name: Name of the Book. - :param chapter: Chapter number. - """ - log.debug('BGExtract.get_bible_chapter("{version}", "{name}", "{chapter}")'.format(version=version, - name=book_name, - chapter=chapter)) - url_book_name = urllib.parse.quote(book_name.encode("utf-8")) - url_params = 'search={name}+{chapter}&version={version}'.format(name=url_book_name, - chapter=chapter, - version=version) - soup = get_soup_for_bible_ref( - 'http://biblegateway.com/passage/?{url}'.format(url=url_params), - pre_parse_regex=r'', pre_parse_substitute='') - if not soup: - return None - div = soup.find('div', 'result-text-style-normal') - if not div: - return None - self._clean_soup(div) - span_list = div.find_all('span', 'text') - log.debug('Span list: {span}'.format(span=span_list)) - if not span_list: - # If we don't get any spans then we must have the old HTML format - verse_list = self._extract_verses_old(div) - else: - verse_list = self._extract_verses(span_list) - if not verse_list: - log.debug('No content found in the BibleGateway response.') - send_error_message('parse') - return None - return SearchResults(book_name, chapter, verse_list) - - def get_books_from_http(self, version): - """ - Load a list of all books a Bible contains from BibleGateway website. - - :param version: The version of the Bible like NIV for New International Version - """ - log.debug('BGExtract.get_books_from_http("{version}")'.format(version=version)) - url_params = urllib.parse.urlencode({'action': 'getVersionInfo', 'vid': '{version}'.format(version=version)}) - reference_url = 'http://biblegateway.com/versions/?{url}#books'.format(url=url_params) - page = get_web_page(reference_url) - if not page: - send_error_message('download') - return None - page_source = page.read() - try: - page_source = str(page_source, 'utf8') - except UnicodeDecodeError: - page_source = str(page_source, 'cp1251') - try: - soup = BeautifulSoup(page_source, 'lxml') - except Exception: - log.error('BeautifulSoup could not parse the Bible page.') - send_error_message('parse') - return None - if not soup: - send_error_message('parse') - return None - self.application.process_events() - content = soup.find('table', 'infotable') - if content: - content = content.find_all('tr') - if not content: - log.error('No books found in the Biblegateway response.') - send_error_message('parse') - return None - books = [] - for book in content: - book = book.find('td') - if book: - books.append(book.contents[1]) - return books - - def get_bibles_from_http(self): - """ - Load a list of bibles from BibleGateway website. - - returns a list in the form [(biblename, biblekey, language_code)] - """ - log.debug('BGExtract.get_bibles_from_http') - bible_url = 'https://biblegateway.com/versions/' - soup = get_soup_for_bible_ref(bible_url) - if not soup: - return None - bible_select = soup.find('select', {'class': 'search-translation-select'}) - if not bible_select: - log.debug('No select tags found - did site change?') - return None - option_tags = bible_select.find_all('option') - if not option_tags: - log.debug('No option tags found - did site change?') - return None - current_lang = '' - bibles = [] - for ot in option_tags: - tag_class = '' - try: - tag_class = ot['class'][0] - except KeyError: - tag_class = '' - tag_text = ot.get_text() - if tag_class == 'lang': - current_lang = tag_text[tag_text.find('(') + 1:tag_text.find(')')].lower() - elif tag_class == 'spacer': - continue - else: - bibles.append((tag_text, ot['value'], current_lang)) - return bibles - - -class BSExtract(RegistryProperties): - """ - Extract verses from Bibleserver.com - """ - def __init__(self, proxy_url=None): - log.debug('BSExtract.init("{url}")'.format(url=proxy_url)) - self.proxy_url = proxy_url - socket.setdefaulttimeout(30) - - def get_bible_chapter(self, version, book_name, chapter): - """ - Access and decode bibles via Bibleserver mobile website - - :param version: The version of the bible like NIV for New International Version - :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung - :param chapter: Chapter number - """ - log.debug('BSExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, - book=book_name, - chapter=chapter)) - url_version = urllib.parse.quote(version.encode("utf-8")) - url_book_name = urllib.parse.quote(book_name.encode("utf-8")) - chapter_url = 'http://m.bibleserver.com/text/{version}/{name}{chapter:d}'.format(version=url_version, - name=url_book_name, - chapter=chapter) - header = ('Accept-Language', 'en') - soup = get_soup_for_bible_ref(chapter_url, header) - if not soup: - return None - self.application.process_events() - content = soup.find('div', 'content') - if not content: - log.error('No verses found in the Bibleserver response.') - send_error_message('parse') - return None - content = content.find('div').find_all('div') - verses = {} - for verse in content: - self.application.process_events() - versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class']))) - verses[versenumber] = verse.contents[1].rstrip('\n') - return SearchResults(book_name, chapter, verses) - - def get_books_from_http(self, version): - """ - Load a list of all books a Bible contains from Bibleserver mobile website. - - :param version: The version of the Bible like NIV for New International Version - """ - log.debug('BSExtract.get_books_from_http("{version}")'.format(version=version)) - url_version = urllib.parse.quote(version.encode("utf-8")) - chapter_url = 'http://m.bibleserver.com/overlay/selectBook?translation={version}'.format(version=url_version) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - content = soup.find('ul') - if not content: - log.error('No books found in the Bibleserver response.') - send_error_message('parse') - return None - content = content.find_all('li') - return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)] - - def get_bibles_from_http(self): - """ - Load a list of bibles from Bibleserver website. - - returns a list in the form [(biblename, biblekey, language_code)] - """ - log.debug('BSExtract.get_bibles_from_http') - bible_url = 'http://www.bibleserver.com/index.php?language=2' - soup = get_soup_for_bible_ref(bible_url) - if not soup: - return None - bible_links = soup.find_all('a', {'class': 'trlCell'}) - if not bible_links: - log.debug('No a tags found - did site change?') - return None - bibles = [] - for link in bible_links: - bible_name = link.get_text() - # Skip any audio - if 'audio' in bible_name.lower(): - continue - try: - bible_link = link['href'] - bible_key = bible_link[bible_link.rfind('/') + 1:] - css_classes = link['class'] - except KeyError: - log.debug('No href/class attribute found - did site change?') - language_code = '' - for css_class in css_classes: - if css_class.startswith('fl_'): - try: - language_code = BIBLESERVER_LANGUAGE_CODE[css_class] - except KeyError: - language_code = '' - bibles.append((bible_name, bible_key, language_code)) - return bibles - - -class CWExtract(RegistryProperties): - """ - Extract verses from CrossWalk/BibleStudyTools - """ - def __init__(self, proxy_url=None): - log.debug('CWExtract.init("{url}")'.format(url=proxy_url)) - self.proxy_url = proxy_url - socket.setdefaulttimeout(30) - - def get_bible_chapter(self, version, book_name, chapter): - """ - Access and decode bibles via the Crosswalk website - - :param version: The version of the Bible like niv for New International Version - :param book_name: Text name of in english e.g. 'gen' for Genesis - :param chapter: Chapter number - """ - log.debug('CWExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, - book=book_name, - chapter=chapter)) - url_book_name = book_name.replace(' ', '-') - url_book_name = url_book_name.lower() - url_book_name = urllib.parse.quote(url_book_name.encode("utf-8")) - chapter_url = 'http://www.biblestudytools.com/{version}/{book}/{chapter}.html'.format(version=version, - book=url_book_name, - chapter=chapter) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - self.application.process_events() - verses_div = soup.find_all('div', 'verse') - if not verses_div: - log.error('No verses found in the CrossWalk response.') - send_error_message('parse') - return None - verses = {} - for verse in verses_div: - self.application.process_events() - verse_number = int(verse.find('strong').contents[0]) - verse_span = verse.find('span') - tags_to_remove = verse_span.find_all(['a', 'sup']) - for tag in tags_to_remove: - tag.decompose() - verse_text = verse_span.get_text() - self.application.process_events() - # Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and . - verse_text = verse_text.strip('\n\r\t ') - verse_text = REDUCE_SPACES_REGEX.sub(' ', verse_text) - verse_text = FIX_PUNKCTUATION_REGEX.sub(r'\1', verse_text) - verses[verse_number] = verse_text - return SearchResults(book_name, chapter, verses) - - def get_books_from_http(self, version): - """ - Load a list of all books a Bible contain from the Crosswalk website. - - :param version: The version of the bible like NIV for New International Version - """ - log.debug('CWExtract.get_books_from_http("{version}")'.format(version=version)) - chapter_url = 'http://www.biblestudytools.com/{version}/'.format(version=version) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - content = soup.find_all('h4', {'class': 'small-header'}) - if not content: - log.error('No books found in the Crosswalk response.') - send_error_message('parse') - return None - books = [] - for book in content: - books.append(book.contents[0]) - return books - - def get_bibles_from_http(self): - """ - Load a list of bibles from Crosswalk website. - returns a list in the form [(biblename, biblekey, language_code)] - """ - log.debug('CWExtract.get_bibles_from_http') - bible_url = 'http://www.biblestudytools.com/bible-versions/' - soup = get_soup_for_bible_ref(bible_url) - if not soup: - return None - h4_tags = soup.find_all('h4', {'class': 'small-header'}) - if not h4_tags: - log.debug('No h4 tags found - did site change?') - return None - bibles = [] - for h4t in h4_tags: - short_name = None - if h4t.span: - short_name = h4t.span.get_text().strip().lower() - else: - log.error('No span tag found - did site change?') - return None - if not short_name: - continue - h4t.span.extract() - tag_text = h4t.get_text().strip() - # The names of non-english bibles has their language in parentheses at the end - if tag_text.endswith(')'): - language = tag_text[tag_text.rfind('(') + 1:-1] - if language in CROSSWALK_LANGUAGES: - language_code = CROSSWALK_LANGUAGES[language] - else: - language_code = '' - # ... except for those that don't... - elif 'latin' in tag_text.lower(): - language_code = 'la' - elif 'la biblia' in tag_text.lower() or 'nueva' in tag_text.lower(): - language_code = 'es' - elif 'chinese' in tag_text.lower(): - language_code = 'zh' - elif 'greek' in tag_text.lower(): - language_code = 'el' - elif 'nova' in tag_text.lower(): - language_code = 'pt' - else: - language_code = 'en' - bibles.append((tag_text, short_name, language_code)) - return bibles - - class HTTPBible(BibleImport, RegistryProperties): log.info('{name} HTTPBible loaded'.format(name=__name__)) diff --git a/openlp/plugins/bibles/lib/importers/osis.py b/openlp/plugins/bibles/lib/importers/osis.py index 99a138acd..c833277fe 100644 --- a/openlp/plugins/bibles/lib/importers/osis.py +++ b/openlp/plugins/bibles/lib/importers/osis.py @@ -108,7 +108,7 @@ class OSISBible(BibleImport): if self.stop_import_flag: break # Remove div-tags in the book - etree.strip_tags(book, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}div')) + etree.strip_tags(book, '{http://www.bibletechnologies.net/2003/OSIS/namespace}div') book_ref_id = self.get_book_ref_id_by_name(book.get('osisID'), num_books, language_id) if not book_ref_id: log.error('Importing books from "{name}" failed'.format(name=self.filename)) diff --git a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py index 084bfa476..fd557eece 100644 --- a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py +++ b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py @@ -25,7 +25,9 @@ from unittest import TestCase, skip from openlp.core.common import Registry -from openlp.plugins.bibles.lib.importers.http import BGExtract, CWExtract, BSExtract +from openlp.plugins.bibles.lib.importers.biblegateway import BGExtract +from openlp.plugins.bibles.lib.importers.bibleserver import BSExtract +from openlp.plugins.bibles.lib.importers.crosswalk import CWExtract from tests.interfaces import MagicMock From f5480640f687d8ed44700243eab02135b42df4fe Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Thu, 11 Aug 2016 20:07:21 +0100 Subject: [PATCH 02/11] more files --- .../bibles/lib/importers/biblegateway.py | 313 ++++++++++++++++++ .../bibles/lib/importers/bibleserver.py | 162 +++++++++ .../plugins/bibles/lib/importers/crosswalk.py | 171 ++++++++++ 3 files changed, 646 insertions(+) create mode 100644 openlp/plugins/bibles/lib/importers/biblegateway.py create mode 100644 openlp/plugins/bibles/lib/importers/bibleserver.py create mode 100644 openlp/plugins/bibles/lib/importers/crosswalk.py diff --git a/openlp/plugins/bibles/lib/importers/biblegateway.py b/openlp/plugins/bibles/lib/importers/biblegateway.py new file mode 100644 index 000000000..c6a8074bf --- /dev/null +++ b/openlp/plugins/bibles/lib/importers/biblegateway.py @@ -0,0 +1,313 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2016 OpenLP Developers # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +The :mod:`http` module enables OpenLP to retrieve scripture from bible websites. +""" +import logging +import socket +import urllib.parse +import urllib.error + +from bs4 import BeautifulSoup, NavigableString, Tag + +from openlp.core.common import RegistryProperties +from openlp.core.lib.webpagereader import get_web_page +from openlp.plugins.bibles.lib import SearchResults +from openlp.plugins.bibles.lib.importers.http import get_soup_for_bible_ref, send_error_message + +UGLY_CHARS = { + '\u2014': ' - ', + '\u2018': '\'', + '\u2019': '\'', + '\u201c': '"', + '\u201d': '"', + ' ': ' ' +} + +log = logging.getLogger(__name__) + + +class BGExtract(RegistryProperties): + """ + Extract verses from BibleGateway + """ + def __init__(self, proxy_url=None): + log.debug('BGExtract.init("{url}")'.format(url=proxy_url)) + self.proxy_url = proxy_url + socket.setdefaulttimeout(30) + + def _remove_elements(self, parent, tag, class_=None): + """ + Remove a particular element from the BeautifulSoup tree. + + :param parent: The element from which items need to be removed. + :param tag: A string of the tab type, e.g. "div" + :param class_: An HTML class attribute for further qualification. + """ + if class_: + all_tags = parent.find_all(tag, class_) + else: + all_tags = parent.find_all(tag) + for element in all_tags: + element.extract() + + def _extract_verse(self, tag): + """ + Extract a verse (or part of a verse) from a tag. + + :param tag: The BeautifulSoup Tag element with the stuff we want. + """ + if isinstance(tag, NavigableString): + return None, str(tag) + elif tag.get('class') and (tag.get('class')[0] == 'versenum' or tag.get('class')[0] == 'versenum mid-line'): + verse = str(tag.string).replace('[', '').replace(']', '').strip() + return verse, None + elif tag.get('class') and tag.get('class')[0] == 'chapternum': + verse = '1' + return verse, None + else: + verse = None + text = '' + for child in tag.contents: + c_verse, c_text = self._extract_verse(child) + if c_verse: + verse = c_verse + if text and c_text: + text += c_text + elif c_text is not None: + text = c_text + return verse, text + + def _clean_soup(self, tag): + """ + Remove all the rubbish from the HTML page. + + :param tag: The base tag within which we want to remove stuff. + """ + self._remove_elements(tag, 'sup', 'crossreference') + self._remove_elements(tag, 'sup', 'footnote') + self._remove_elements(tag, 'div', 'footnotes') + self._remove_elements(tag, 'div', 'crossrefs') + self._remove_elements(tag, 'h3') + self._remove_elements(tag, 'h4') + self._remove_elements(tag, 'h5') + + def _extract_verses(self, tags): + """ + Extract all the verses from a pre-prepared list of HTML tags. + + :param tags: A list of BeautifulSoup Tag elements. + """ + verses = [] + tags = tags[::-1] + current_text = '' + for tag in tags: + verse = None + text = '' + for child in tag.contents: + c_verse, c_text = self._extract_verse(child) + if c_verse: + verse = c_verse + if text and c_text: + text += c_text + elif c_text is not None: + text = c_text + if not verse: + current_text = text + ' ' + current_text + else: + text += ' ' + current_text + current_text = '' + if text: + for old, new in UGLY_CHARS.items(): + text = text.replace(old, new) + text = ' '.join(text.split()) + if verse and text: + verse = verse.strip() + try: + verse = int(verse) + except ValueError: + verse_parts = verse.split('-') + if len(verse_parts) > 1: + verse = int(verse_parts[0]) + except TypeError: + log.warning('Illegal verse number: {verse:d}'.format(verse=verse)) + verses.append((verse, text)) + verse_list = {} + for verse, text in verses[::-1]: + verse_list[verse] = text + return verse_list + + def _extract_verses_old(self, div): + """ + Use the old style of parsing for those Bibles on BG who mysteriously have not been migrated to the new (still + broken) HTML. + + :param div: The parent div. + """ + verse_list = {} + # Cater for inconsistent mark up in the first verse of a chapter. + first_verse = div.find('versenum') + if first_verse and first_verse.contents: + verse_list[1] = str(first_verse.contents[0]) + for verse in div('sup', 'versenum'): + raw_verse_num = verse.next_element + clean_verse_num = 0 + # Not all verses exist in all translations and may or may not be represented by a verse number. If they are + # not fine, if they are it will probably be in a format that breaks int(). We will then have no idea what + # garbage may be sucked in to the verse text so if we do not get a clean int() then ignore the verse + # completely. + try: + clean_verse_num = int(str(raw_verse_num)) + except ValueError: + verse_parts = str(raw_verse_num).split('-') + if len(verse_parts) > 1: + clean_verse_num = int(verse_parts[0]) + except TypeError: + log.warning('Illegal verse number: {verse:d}'.format(verse=raw_verse_num)) + if clean_verse_num: + verse_text = raw_verse_num.next_element + part = raw_verse_num.next_element.next_element + while not (isinstance(part, Tag) and part.get('class')[0] == 'versenum'): + # While we are still in the same verse grab all the text. + if isinstance(part, NavigableString): + verse_text += part + if isinstance(part.next_element, Tag) and part.next_element.name == 'div': + # Run out of verses so stop. + break + part = part.next_element + verse_list[clean_verse_num] = str(verse_text) + return verse_list + + def get_bible_chapter(self, version, book_name, chapter): + """ + Access and decode Bibles via the BibleGateway website. + + :param version: The version of the Bible like 31 for New International version. + :param book_name: Name of the Book. + :param chapter: Chapter number. + """ + log.debug('BGExtract.get_bible_chapter("{version}", "{name}", "{chapter}")'.format(version=version, + name=book_name, + chapter=chapter)) + url_book_name = urllib.parse.quote(book_name.encode("utf-8")) + url_params = 'search={name}+{chapter}&version={version}'.format(name=url_book_name, + chapter=chapter, + version=version) + soup = get_soup_for_bible_ref( + 'http://biblegateway.com/passage/?{url}'.format(url=url_params), + pre_parse_regex=r'', pre_parse_substitute='') + if not soup: + return None + div = soup.find('div', 'result-text-style-normal') + if not div: + return None + self._clean_soup(div) + span_list = div.find_all('span', 'text') + log.debug('Span list: {span}'.format(span=span_list)) + if not span_list: + # If we don't get any spans then we must have the old HTML format + verse_list = self._extract_verses_old(div) + else: + verse_list = self._extract_verses(span_list) + if not verse_list: + log.debug('No content found in the BibleGateway response.') + send_error_message('parse') + return None + return SearchResults(book_name, chapter, verse_list) + + def get_books_from_http(self, version): + """ + Load a list of all books a Bible contains from BibleGateway website. + + :param version: The version of the Bible like NIV for New International Version + """ + log.debug('BGExtract.get_books_from_http("{version}")'.format(version=version)) + url_params = urllib.parse.urlencode({'action': 'getVersionInfo', 'vid': '{version}'.format(version=version)}) + reference_url = 'http://biblegateway.com/versions/?{url}#books'.format(url=url_params) + page = get_web_page(reference_url) + if not page: + send_error_message('download') + return None + page_source = page.read() + try: + page_source = str(page_source, 'utf8') + except UnicodeDecodeError: + page_source = str(page_source, 'cp1251') + try: + soup = BeautifulSoup(page_source, 'lxml') + except Exception: + log.error('BeautifulSoup could not parse the Bible page.') + send_error_message('parse') + return None + if not soup: + send_error_message('parse') + return None + self.application.process_events() + content = soup.find('table', 'infotable') + if content: + content = content.find_all('tr') + if not content: + log.error('No books found in the Biblegateway response.') + send_error_message('parse') + return None + books = [] + for book in content: + book = book.find('td') + if book: + books.append(book.contents[1]) + return books + + def get_bibles_from_http(self): + """ + Load a list of bibles from BibleGateway website. + + returns a list in the form [(biblename, biblekey, language_code)] + """ + log.debug('BGExtract.get_bibles_from_http') + bible_url = 'https://biblegateway.com/versions/' + soup = get_soup_for_bible_ref(bible_url) + if not soup: + return None + bible_select = soup.find('select', {'class': 'search-translation-select'}) + if not bible_select: + log.debug('No select tags found - did site change?') + return None + option_tags = bible_select.find_all('option') + if not option_tags: + log.debug('No option tags found - did site change?') + return None + current_lang = '' + bibles = [] + for ot in option_tags: + tag_class = '' + try: + tag_class = ot['class'][0] + except KeyError: + tag_class = '' + tag_text = ot.get_text() + if tag_class == 'lang': + current_lang = tag_text[tag_text.find('(') + 1:tag_text.find(')')].lower() + elif tag_class == 'spacer': + continue + else: + bibles.append((tag_text, ot['value'], current_lang)) + return bibles diff --git a/openlp/plugins/bibles/lib/importers/bibleserver.py b/openlp/plugins/bibles/lib/importers/bibleserver.py new file mode 100644 index 000000000..e651b84ab --- /dev/null +++ b/openlp/plugins/bibles/lib/importers/bibleserver.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2016 OpenLP Developers # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +The :mod:`http` module enables OpenLP to retrieve scripture from bible websites. +""" +import logging +import re +import socket +import urllib.parse +import urllib.error + +from openlp.core.common import RegistryProperties +from openlp.plugins.bibles.lib import SearchResults +from openlp.plugins.bibles.lib.http import get_soup_for_bible_ref, send_error_message + +VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*') + +BIBLESERVER_LANGUAGE_CODE = { + 'fl_1': 'de', + 'fl_2': 'en', + 'fl_3': 'fr', + 'fl_4': 'it', + 'fl_5': 'es', + 'fl_6': 'pt', + 'fl_7': 'ru', + 'fl_8': 'sv', + 'fl_9': 'no', + 'fl_10': 'nl', + 'fl_11': 'cs', + 'fl_12': 'sk', + 'fl_13': 'ro', + 'fl_14': 'hr', + 'fl_15': 'hu', + 'fl_16': 'bg', + 'fl_17': 'ar', + 'fl_18': 'tr', + 'fl_19': 'pl', + 'fl_20': 'da', + 'fl_21': 'zh' +} + +log = logging.getLogger(__name__) + + +class BSExtract(RegistryProperties): + """ + Extract verses from Bibleserver.com + """ + def __init__(self, proxy_url=None): + log.debug('BSExtract.init("{url}")'.format(url=proxy_url)) + self.proxy_url = proxy_url + socket.setdefaulttimeout(30) + + def get_bible_chapter(self, version, book_name, chapter): + """ + Access and decode bibles via Bibleserver mobile website + + :param version: The version of the bible like NIV for New International Version + :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung + :param chapter: Chapter number + """ + log.debug('BSExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, + book=book_name, + chapter=chapter)) + url_version = urllib.parse.quote(version.encode("utf-8")) + url_book_name = urllib.parse.quote(book_name.encode("utf-8")) + chapter_url = 'http://m.bibleserver.com/text/{version}/{name}{chapter:d}'.format(version=url_version, + name=url_book_name, + chapter=chapter) + header = ('Accept-Language', 'en') + soup = get_soup_for_bible_ref(chapter_url, header) + if not soup: + return None + self.application.process_events() + content = soup.find('div', 'content') + if not content: + log.error('No verses found in the Bibleserver response.') + send_error_message('parse') + return None + content = content.find('div').find_all('div') + verses = {} + for verse in content: + self.application.process_events() + versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class']))) + verses[versenumber] = verse.contents[1].rstrip('\n') + return SearchResults(book_name, chapter, verses) + + def get_books_from_http(self, version): + """ + Load a list of all books a Bible contains from Bibleserver mobile website. + + :param version: The version of the Bible like NIV for New International Version + """ + log.debug('BSExtract.get_books_from_http("{version}")'.format(version=version)) + url_version = urllib.parse.quote(version.encode("utf-8")) + chapter_url = 'http://m.bibleserver.com/overlay/selectBook?translation={version}'.format(version=url_version) + soup = get_soup_for_bible_ref(chapter_url) + if not soup: + return None + content = soup.find('ul') + if not content: + log.error('No books found in the Bibleserver response.') + send_error_message('parse') + return None + content = content.find_all('li') + return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)] + + def get_bibles_from_http(self): + """ + Load a list of bibles from Bibleserver website. + + returns a list in the form [(biblename, biblekey, language_code)] + """ + log.debug('BSExtract.get_bibles_from_http') + bible_url = 'http://www.bibleserver.com/index.php?language=2' + soup = get_soup_for_bible_ref(bible_url) + if not soup: + return None + bible_links = soup.find_all('a', {'class': 'trlCell'}) + if not bible_links: + log.debug('No a tags found - did site change?') + return None + bibles = [] + for link in bible_links: + bible_name = link.get_text() + # Skip any audio + if 'audio' in bible_name.lower(): + continue + try: + bible_link = link['href'] + bible_key = bible_link[bible_link.rfind('/') + 1:] + css_classes = link['class'] + except KeyError: + log.debug('No href/class attribute found - did site change?') + language_code = '' + for css_class in css_classes: + if css_class.startswith('fl_'): + try: + language_code = BIBLESERVER_LANGUAGE_CODE[css_class] + except KeyError: + language_code = '' + bibles.append((bible_name, bible_key, language_code)) + return bibles diff --git a/openlp/plugins/bibles/lib/importers/crosswalk.py b/openlp/plugins/bibles/lib/importers/crosswalk.py new file mode 100644 index 000000000..6c75209d1 --- /dev/null +++ b/openlp/plugins/bibles/lib/importers/crosswalk.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2016 OpenLP Developers # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +The :mod:`http` module enables OpenLP to retrieve scripture from bible websites. +""" +import logging +import re +import socket +import urllib.parse +import urllib.error + +from openlp.core.common import RegistryProperties +from openlp.plugins.bibles.lib import SearchResults +from openlp.plugins.bibles.lib.importers.http import get_soup_for_bible_ref, send_error_message + +FIX_PUNKCTUATION_REGEX = re.compile(r'[ ]+([.,;])') +REDUCE_SPACES_REGEX = re.compile(r'[ ]{2,}') + + +CROSSWALK_LANGUAGES = { + 'Portuguese': 'pt', + 'German': 'de', + 'Italian': 'it', + 'Español': 'es', + 'French': 'fr', + 'Dutch': 'nl' +} + +log = logging.getLogger(__name__) + + +class CWExtract(RegistryProperties): + """ + Extract verses from CrossWalk/BibleStudyTools + """ + def __init__(self, proxy_url=None): + log.debug('CWExtract.init("{url}")'.format(url=proxy_url)) + self.proxy_url = proxy_url + socket.setdefaulttimeout(30) + + def get_bible_chapter(self, version, book_name, chapter): + """ + Access and decode bibles via the Crosswalk website + + :param version: The version of the Bible like niv for New International Version + :param book_name: Text name of in english e.g. 'gen' for Genesis + :param chapter: Chapter number + """ + log.debug('CWExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, + book=book_name, + chapter=chapter)) + url_book_name = book_name.replace(' ', '-') + url_book_name = url_book_name.lower() + url_book_name = urllib.parse.quote(url_book_name.encode("utf-8")) + chapter_url = 'http://www.biblestudytools.com/{version}/{book}/{chapter}.html'.format(version=version, + book=url_book_name, + chapter=chapter) + soup = get_soup_for_bible_ref(chapter_url) + if not soup: + return None + self.application.process_events() + verses_div = soup.find_all('div', 'verse') + if not verses_div: + log.error('No verses found in the CrossWalk response.') + send_error_message('parse') + return None + verses = {} + for verse in verses_div: + self.application.process_events() + verse_number = int(verse.find('strong').contents[0]) + verse_span = verse.find('span') + tags_to_remove = verse_span.find_all(['a', 'sup']) + for tag in tags_to_remove: + tag.decompose() + verse_text = verse_span.get_text() + self.application.process_events() + # Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and . + verse_text = verse_text.strip('\n\r\t ') + verse_text = REDUCE_SPACES_REGEX.sub(' ', verse_text) + verse_text = FIX_PUNKCTUATION_REGEX.sub(r'\1', verse_text) + verses[verse_number] = verse_text + return SearchResults(book_name, chapter, verses) + + def get_books_from_http(self, version): + """ + Load a list of all books a Bible contain from the Crosswalk website. + + :param version: The version of the bible like NIV for New International Version + """ + log.debug('CWExtract.get_books_from_http("{version}")'.format(version=version)) + chapter_url = 'http://www.biblestudytools.com/{version}/'.format(version=version) + soup = get_soup_for_bible_ref(chapter_url) + if not soup: + return None + content = soup.find_all('h4', {'class': 'small-header'}) + if not content: + log.error('No books found in the Crosswalk response.') + send_error_message('parse') + return None + books = [] + for book in content: + books.append(book.contents[0]) + return books + + def get_bibles_from_http(self): + """ + Load a list of bibles from Crosswalk website. + returns a list in the form [(biblename, biblekey, language_code)] + """ + log.debug('CWExtract.get_bibles_from_http') + bible_url = 'http://www.biblestudytools.com/bible-versions/' + soup = get_soup_for_bible_ref(bible_url) + if not soup: + return None + h4_tags = soup.find_all('h4', {'class': 'small-header'}) + if not h4_tags: + log.debug('No h4 tags found - did site change?') + return None + bibles = [] + for h4t in h4_tags: + short_name = None + if h4t.span: + short_name = h4t.span.get_text().strip().lower() + else: + log.error('No span tag found - did site change?') + return None + if not short_name: + continue + h4t.span.extract() + tag_text = h4t.get_text().strip() + # The names of non-english bibles has their language in parentheses at the end + if tag_text.endswith(')'): + language = tag_text[tag_text.rfind('(') + 1:-1] + if language in CROSSWALK_LANGUAGES: + language_code = CROSSWALK_LANGUAGES[language] + else: + language_code = '' + # ... except for those that don't... + elif 'latin' in tag_text.lower(): + language_code = 'la' + elif 'la biblia' in tag_text.lower() or 'nueva' in tag_text.lower(): + language_code = 'es' + elif 'chinese' in tag_text.lower(): + language_code = 'zh' + elif 'greek' in tag_text.lower(): + language_code = 'el' + elif 'nova' in tag_text.lower(): + language_code = 'pt' + else: + language_code = 'en' + bibles.append((tag_text, short_name, language_code)) + return bibles From 83b30799ca41ca3e444f4896ea96b67102f1bc4f Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Thu, 11 Aug 2016 20:34:55 +0100 Subject: [PATCH 03/11] finished up with tests --- .../bibles/lib/{importers => }/http.py | 0 .../bibles/lib/importers/biblegateway.py | 4 +- .../bibles/lib/importers/bibleserver.py | 2 +- .../plugins/bibles/lib/importers/crosswalk.py | 4 +- openlp/plugins/bibles/lib/manager.py | 2 +- .../{test_http.py => test_bibleserver.py} | 43 ++++--------------- 6 files changed, 14 insertions(+), 41 deletions(-) rename openlp/plugins/bibles/lib/{importers => }/http.py (100%) rename tests/functional/openlp_plugins/bibles/{test_http.py => test_bibleserver.py} (88%) diff --git a/openlp/plugins/bibles/lib/importers/http.py b/openlp/plugins/bibles/lib/http.py similarity index 100% rename from openlp/plugins/bibles/lib/importers/http.py rename to openlp/plugins/bibles/lib/http.py diff --git a/openlp/plugins/bibles/lib/importers/biblegateway.py b/openlp/plugins/bibles/lib/importers/biblegateway.py index c6a8074bf..f3caa2204 100644 --- a/openlp/plugins/bibles/lib/importers/biblegateway.py +++ b/openlp/plugins/bibles/lib/importers/biblegateway.py @@ -20,7 +20,7 @@ # Temple Place, Suite 330, Boston, MA 02111-1307 USA # ############################################################################### """ -The :mod:`http` module enables OpenLP to retrieve scripture from bible websites. +The :mod:`biblegateway` module enables OpenLP to retrieve scripture from http://biblegateway.com. """ import logging import socket @@ -32,7 +32,7 @@ from bs4 import BeautifulSoup, NavigableString, Tag from openlp.core.common import RegistryProperties from openlp.core.lib.webpagereader import get_web_page from openlp.plugins.bibles.lib import SearchResults -from openlp.plugins.bibles.lib.importers.http import get_soup_for_bible_ref, send_error_message +from openlp.plugins.bibles.lib.http import get_soup_for_bible_ref, send_error_message UGLY_CHARS = { '\u2014': ' - ', diff --git a/openlp/plugins/bibles/lib/importers/bibleserver.py b/openlp/plugins/bibles/lib/importers/bibleserver.py index e651b84ab..16924d84a 100644 --- a/openlp/plugins/bibles/lib/importers/bibleserver.py +++ b/openlp/plugins/bibles/lib/importers/bibleserver.py @@ -20,7 +20,7 @@ # Temple Place, Suite 330, Boston, MA 02111-1307 USA # ############################################################################### """ -The :mod:`http` module enables OpenLP to retrieve scripture from bible websites. +The :mod:`bibleserver` module enables OpenLP to retrieve scripture from http://bibleserver.com. """ import logging import re diff --git a/openlp/plugins/bibles/lib/importers/crosswalk.py b/openlp/plugins/bibles/lib/importers/crosswalk.py index 6c75209d1..fb354dd29 100644 --- a/openlp/plugins/bibles/lib/importers/crosswalk.py +++ b/openlp/plugins/bibles/lib/importers/crosswalk.py @@ -20,7 +20,7 @@ # Temple Place, Suite 330, Boston, MA 02111-1307 USA # ############################################################################### """ -The :mod:`http` module enables OpenLP to retrieve scripture from bible websites. +The :mod:`crosswalk` module enables OpenLP to retrieve scripture from www.biblestudytools.com. """ import logging import re @@ -30,7 +30,7 @@ import urllib.error from openlp.core.common import RegistryProperties from openlp.plugins.bibles.lib import SearchResults -from openlp.plugins.bibles.lib.importers.http import get_soup_for_bible_ref, send_error_message +from openlp.plugins.bibles.lib.http import get_soup_for_bible_ref, send_error_message FIX_PUNKCTUATION_REGEX = re.compile(r'[ ]+([.,;])') REDUCE_SPACES_REGEX = re.compile(r'[ ]{2,}') diff --git a/openlp/plugins/bibles/lib/manager.py b/openlp/plugins/bibles/lib/manager.py index d2286bed2..2734411f5 100644 --- a/openlp/plugins/bibles/lib/manager.py +++ b/openlp/plugins/bibles/lib/manager.py @@ -27,7 +27,7 @@ from openlp.core.common import RegistryProperties, AppLocation, Settings, transl from openlp.plugins.bibles.lib import parse_reference, LanguageSelection from openlp.plugins.bibles.lib.db import BibleDB, BibleMeta from .importers.csvbible import CSVBible -from .importers.http import HTTPBible +from .http import HTTPBible from .importers.opensong import OpenSongBible from .importers.osis import OSISBible from .importers.zefania import ZefaniaBible diff --git a/tests/functional/openlp_plugins/bibles/test_http.py b/tests/functional/openlp_plugins/bibles/test_bibleserver.py similarity index 88% rename from tests/functional/openlp_plugins/bibles/test_http.py rename to tests/functional/openlp_plugins/bibles/test_bibleserver.py index 839c81008..0849a63e3 100644 --- a/tests/functional/openlp_plugins/bibles/test_http.py +++ b/tests/functional/openlp_plugins/bibles/test_bibleserver.py @@ -20,41 +20,13 @@ # Temple Place, Suite 330, Boston, MA 02111-1307 USA # ############################################################################### """ -This module contains tests for the http module of the Bibles plugin. +This module contains tests for the bibleserver module of the Bibles plugin. """ from unittest import TestCase from bs4 import BeautifulSoup from tests.functional import patch, MagicMock -from openlp.plugins.bibles.lib.importers.http import BSExtract - -# TODO: Items left to test -# BGExtract -# __init__ -# _remove_elements -# _extract_verse -# _clean_soup -# _extract_verses -# _extract_verses_old -# get_bible_chapter -# get_books_from_http -# _get_application -# CWExtract -# __init__ -# get_bible_chapter -# get_books_from_http -# _get_application -# HTTPBible -# __init__ -# do_import -# get_verses -# get_chapter -# get_books -# get_chapter_count -# get_verse_count -# _get_application -# get_soup_for_bible_ref -# send_error_message +from openlp.plugins.bibles.lib.importers.bibleserver import BSExtract class TestBSExtract(TestCase): @@ -68,11 +40,12 @@ class TestBSExtract(TestCase): # get_books_from_http # _get_application def setUp(self): - self.get_soup_for_bible_ref_patcher = patch('openlp.plugins.bibles.lib.importers.http.get_soup_for_bible_ref') - self.log_patcher = patch('openlp.plugins.bibles.lib.importers.http.log') - self.send_error_message_patcher = patch('openlp.plugins.bibles.lib.importers.http.send_error_message') - self.socket_patcher = patch('openlp.plugins.bibles.lib.importers.http.socket') - self.urllib_patcher = patch('openlp.plugins.bibles.lib.importers.http.urllib') + self.get_soup_for_bible_ref_patcher = patch( + 'openlp.plugins.bibles.lib.importers.bibleserver.get_soup_for_bible_ref') + self.log_patcher = patch('openlp.plugins.bibles.lib.importers.bibleserver.log') + self.send_error_message_patcher = patch('openlp.plugins.bibles.lib.importers.bibleserver.send_error_message') + self.socket_patcher = patch('openlp.plugins.bibles.lib.http.socket') + self.urllib_patcher = patch('openlp.plugins.bibles.lib.importers.bibleserver.urllib') self.mock_get_soup_for_bible_ref = self.get_soup_for_bible_ref_patcher.start() self.mock_log = self.log_patcher.start() From f08d0c28a58f4885a56c8dc52bbff376082a04b3 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sun, 14 Aug 2016 11:00:27 +0100 Subject: [PATCH 04/11] further bible refactors --- openlp/plugins/bibles/bibleplugin.py | 8 +- openlp/plugins/bibles/lib/__init__.py | 4 +- openlp/plugins/bibles/lib/bibleimport.py | 1 - .../plugins/bibles/lib/importers/opensong.py | 7 +- openlp/plugins/bibles/lib/importers/osis.py | 9 +- .../plugins/bibles/lib/importers/zefania.py | 4 +- .../openlp_plugins/bibles/test_bibleimport.py | 85 ++++++++++++++++--- .../openlp_plugins/bibles/test_csvimport.py | 4 +- .../bibles/test_zefaniaimport.py | 6 +- 9 files changed, 86 insertions(+), 42 deletions(-) diff --git a/openlp/plugins/bibles/bibleplugin.py b/openlp/plugins/bibles/bibleplugin.py index f63b85a92..e9168d695 100644 --- a/openlp/plugins/bibles/bibleplugin.py +++ b/openlp/plugins/bibles/bibleplugin.py @@ -140,10 +140,10 @@ class BiblePlugin(Plugin): def uses_theme(self, theme): """ - Called to find out if the bible plugin is currently using a theme. Returns ``1`` if the theme is being used, - otherwise returns ``0``. + Called to find out if the bible plugin is currently using a theme. :param theme: The theme + :return: 1 if the theme is being used, otherwise returns 0 """ if str(self.settings_tab.bible_theme) == theme: return 1 @@ -151,11 +151,11 @@ class BiblePlugin(Plugin): def rename_theme(self, old_theme, new_theme): """ - Rename the theme the bible plugin is using making the plugin use the - new name. + Rename the theme the bible plugin is using, making the plugin use the new name. :param old_theme: The name of the theme the plugin should stop using. Unused for this particular plugin. :param new_theme: The new name the plugin should now use. + :return: None """ self.settings_tab.bible_theme = new_theme self.settings_tab.save() diff --git a/openlp/plugins/bibles/lib/__init__.py b/openlp/plugins/bibles/lib/__init__.py index 804755d18..e730009e7 100644 --- a/openlp/plugins/bibles/lib/__init__.py +++ b/openlp/plugins/bibles/lib/__init__.py @@ -173,7 +173,7 @@ class BibleStrings(object): def update_reference_separators(): """ - Updates separators and matches for parsing and formating scripture references. + Updates separators and matches for parsing and formatting scripture references. """ default_separators = [ '|'.join([ @@ -215,7 +215,7 @@ def update_reference_separators(): # escape reserved characters for character in '\\.^$*+?{}[]()': source_string = source_string.replace(character, '\\' + character) - # add various unicode alternatives + # add various Unicode alternatives source_string = source_string.replace('-', '(?:[-\u00AD\u2010\u2011\u2012\u2014\u2014\u2212\uFE63\uFF0D])') source_string = source_string.replace(',', '(?:[,\u201A])') REFERENCE_SEPARATORS['sep_{role}'.format(role=role)] = '\s*(?:{source})\s*'.format(source=source_string) diff --git a/openlp/plugins/bibles/lib/bibleimport.py b/openlp/plugins/bibles/lib/bibleimport.py index 4d015223b..7ebdcb170 100644 --- a/openlp/plugins/bibles/lib/bibleimport.py +++ b/openlp/plugins/bibles/lib/bibleimport.py @@ -35,7 +35,6 @@ class BibleImport(OpenLPMixin, BibleDB): """ Helper class to import bibles from a third party source into OpenLP """ - # TODO: Test def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.filename = kwargs['filename'] if 'filename' in kwargs else None diff --git a/openlp/plugins/bibles/lib/importers/opensong.py b/openlp/plugins/bibles/lib/importers/opensong.py index 43c1cf8ca..10c0ed87e 100644 --- a/openlp/plugins/bibles/lib/importers/opensong.py +++ b/openlp/plugins/bibles/lib/importers/opensong.py @@ -73,12 +73,7 @@ class OpenSongBible(BibleImport): for book in bible.b: if self.stop_import_flag: break - book_ref_id = self.get_book_ref_id_by_name(str(book.attrib['n']), len(bible.b), language_id) - if not book_ref_id: - log.error('Importing books from "{name}" failed'.format(name=self.filename)) - return False - book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) - db_book = self.create_book(book.attrib['n'], book_ref_id, book_details['testament_id']) + db_book = self.find_and_create_book(str(book.attrib['n']), len(bible.b), language_id) chapter_number = 0 for chapter in book.c: if self.stop_import_flag: diff --git a/openlp/plugins/bibles/lib/importers/osis.py b/openlp/plugins/bibles/lib/importers/osis.py index c833277fe..db12bb7e9 100644 --- a/openlp/plugins/bibles/lib/importers/osis.py +++ b/openlp/plugins/bibles/lib/importers/osis.py @@ -98,7 +98,7 @@ class OSISBible(BibleImport): language_id = self.get_language_id(language[0] if language else None, bible_name=self.filename) if not language_id: return False - num_books = int(osis_bible_tree.xpath("count(//ns:div[@type='book'])", namespaces=NS)) + no_of_books = int(osis_bible_tree.xpath("count(//ns:div[@type='book'])", namespaces=NS)) # Precompile a few xpath-querys verse_in_chapter = etree.XPath('count(//ns:chapter[1]/ns:verse)', namespaces=NS) text_in_verse = etree.XPath('count(//ns:verse[1]/text())', namespaces=NS) @@ -109,12 +109,7 @@ class OSISBible(BibleImport): break # Remove div-tags in the book etree.strip_tags(book, '{http://www.bibletechnologies.net/2003/OSIS/namespace}div') - book_ref_id = self.get_book_ref_id_by_name(book.get('osisID'), num_books, language_id) - if not book_ref_id: - log.error('Importing books from "{name}" failed'.format(name=self.filename)) - return False - book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) - db_book = self.create_book(book_details['name'], book_ref_id, book_details['testament_id']) + db_book = self.find_and_create_book(book.get('osisID'), no_of_books, language_id) # Find out if chapter-tags contains the verses, or if it is used as milestone/anchor if int(verse_in_chapter(book)) > 0: # The chapter tags contains the verses diff --git a/openlp/plugins/bibles/lib/importers/zefania.py b/openlp/plugins/bibles/lib/importers/zefania.py index 61ee41166..bc31a1664 100644 --- a/openlp/plugins/bibles/lib/importers/zefania.py +++ b/openlp/plugins/bibles/lib/importers/zefania.py @@ -54,7 +54,7 @@ class ZefaniaBible(BibleImport): language_id = self.get_language_id(language[0] if language else None, bible_name=self.filename) if not language_id: return False - num_books = int(xmlbible.xpath('count(//BIBLEBOOK)')) + no_of_books = int(xmlbible.xpath('count(//BIBLEBOOK)')) self.wizard.progress_bar.setMaximum(int(xmlbible.xpath('count(//CHAPTER)'))) for BIBLEBOOK in xmlbible: if self.stop_import_flag: @@ -64,7 +64,7 @@ class ZefaniaBible(BibleImport): if not bname and not bnumber: continue if bname: - book_ref_id = self.get_book_ref_id_by_name(bname, num_books, language_id) + book_ref_id = self.get_book_ref_id_by_name(bname, no_of_books, language_id) else: log.debug('Could not find a name, will use number, basically a guess.') book_ref_id = int(bnumber) diff --git a/tests/functional/openlp_plugins/bibles/test_bibleimport.py b/tests/functional/openlp_plugins/bibles/test_bibleimport.py index e2076df55..127c6fd16 100644 --- a/tests/functional/openlp_plugins/bibles/test_bibleimport.py +++ b/tests/functional/openlp_plugins/bibles/test_bibleimport.py @@ -29,8 +29,10 @@ from lxml import etree, objectify from unittest import TestCase from openlp.core.common.languages import Language +from openlp.core.lib.exceptions import ValidationError from openlp.plugins.bibles.lib.bibleimport import BibleImport -from tests.functional import MagicMock, patch +from openlp.plugins.bibles.lib.db import BibleDB +from tests.functional import ANY, MagicMock, patch class TestBibleImport(TestCase): @@ -39,23 +41,79 @@ class TestBibleImport(TestCase): """ def setUp(self): - test_file = BytesIO(b'\n' - b'\n' - b'
Test

data

tokeep
\n' - b' Testdatatodiscard\n' - b'
') + test_file = BytesIO( + b'\n' + b'\n' + b'
Test

data

tokeep
\n' + b' Testdatatodiscard\n' + b'
' + ) self.file_patcher = patch('builtins.open', return_value=test_file) - self.log_patcher = patch('openlp.plugins.bibles.lib.bibleimport.log') - self.setup_patcher = patch('openlp.plugins.bibles.lib.db.BibleDB._setup') - self.addCleanup(self.file_patcher.stop) - self.addCleanup(self.log_patcher.stop) - self.addCleanup(self.setup_patcher.stop) - self.file_patcher.start() + self.log_patcher = patch('openlp.plugins.bibles.lib.bibleimport.log') + self.addCleanup(self.log_patcher.stop) self.mock_log = self.log_patcher.start() + self.setup_patcher = patch('openlp.plugins.bibles.lib.db.BibleDB._setup') + self.addCleanup(self.setup_patcher.stop) self.setup_patcher.start() + def init_kwargs_none_test(self): + """ + Test the initialisation of the BibleImport Class when no key word arguments are supplied + """ + # GIVEN: A patched BibleDB._setup, BibleImport class and mocked parent + # WHEN: Creating an instance of BibleImport with no key word arguments + instance = BibleImport(MagicMock()) + + # THEN: The filename attribute should be None + self.assertIsNone(instance.filename) + self.assertIsInstance(instance, BibleDB) + + def init_kwargs_set_test(self): + """ + Test the initialisation of the BibleImport Class when supplied with select keyword arguments + """ + # GIVEN: A patched BibleDB._setup, BibleImport class and mocked parent + # WHEN: Creating an instance of BibleImport with selected key word arguments + kwargs = {'filename': 'bible.xml'} + instance = BibleImport(MagicMock(), **kwargs) + + # THEN: The filename keyword should be set to bible.xml + self.assertEqual(instance.filename, 'bible.xml') + self.assertIsInstance(instance, BibleDB) + + def check_for_compression_test(self): + """ + Test the check_for_compression method when called with a path to an uncompressed file + """ + # GIVEN: A mocked is_zipfile which returns False and an instance of BibleImport + with patch('openlp.plugins.bibles.lib.bibleimport.is_zipfile', return_value=False) as mocked_is_zip: + instance = BibleImport(MagicMock()) + + # WHEN: Calling check_for_compression + result = instance.check_for_compression('filename.tst') + + # THEN: None should be returned + self.assertIsNone(result) + mocked_is_zip.assert_called_once_with('filename.tst') + + def check_for_compression_zip_file_test(self): + """ + Test the check_for_compression method when called with a path to a compressed file + """ + # GIVEN: A patched is_zipfile which returns True and an instance of BibleImport + with patch('openlp.plugins.bibles.lib.bibleimport.is_zipfile', return_value=True),\ + patch('openlp.plugins.bibles.lib.bibleimport.critical_error_message_box') as mocked_message_box: + instance = BibleImport(MagicMock()) + + # WHEN: Calling check_for_compression + # THEN: A Validation error should be raised and the user should be notified. + with self.assertRaises(ValidationError) as context: + instance.check_for_compression('filename.tst') + self.assertTrue(mocked_message_box.called) + self.assertEqual(context.exception.msg, '"filename.tst" is compressed') + def get_language_id_language_found_test(self): """ Test get_language_id() when called with a name found in the languages list @@ -81,8 +139,7 @@ class TestBibleImport(TestCase): Test get_language_id() when called with a name not found in the languages list """ # GIVEN: A mocked languages.get_language which returns language and an instance of BibleImport - with patch('openlp.core.common.languages.get_language', return_value=None) \ - as mocked_languages_get_language, \ + with patch('openlp.core.common.languages.get_language', return_value=None) as mocked_languages_get_language, \ patch('openlp.plugins.bibles.lib.db.BibleDB.get_language', return_value=20) as mocked_db_get_language: instance = BibleImport(MagicMock()) instance.save_meta = MagicMock() diff --git a/tests/functional/openlp_plugins/bibles/test_csvimport.py b/tests/functional/openlp_plugins/bibles/test_csvimport.py index ada03a07d..7a56c1b85 100644 --- a/tests/functional/openlp_plugins/bibles/test_csvimport.py +++ b/tests/functional/openlp_plugins/bibles/test_csvimport.py @@ -46,10 +46,10 @@ class TestCSVImport(TestCase): def setUp(self): self.manager_patcher = patch('openlp.plugins.bibles.lib.db.Manager') - self.registry_patcher = patch('openlp.plugins.bibles.lib.db.Registry') self.addCleanup(self.manager_patcher.stop) - self.addCleanup(self.registry_patcher.stop) self.manager_patcher.start() + self.registry_patcher = patch('openlp.plugins.bibles.lib.db.Registry') + self.addCleanup(self.registry_patcher.stop) self.registry_patcher.start() def test_create_importer(self): diff --git a/tests/functional/openlp_plugins/bibles/test_zefaniaimport.py b/tests/functional/openlp_plugins/bibles/test_zefaniaimport.py index 200a36f45..5294b7f5c 100644 --- a/tests/functional/openlp_plugins/bibles/test_zefaniaimport.py +++ b/tests/functional/openlp_plugins/bibles/test_zefaniaimport.py @@ -42,14 +42,12 @@ class TestZefaniaImport(TestCase): def setUp(self): self.registry_patcher = patch('openlp.plugins.bibles.lib.db.Registry') + self.addCleanup(self.registry_patcher.stop) self.registry_patcher.start() self.manager_patcher = patch('openlp.plugins.bibles.lib.db.Manager') + self.addCleanup(self.manager_patcher.stop) self.manager_patcher.start() - def tearDown(self): - self.registry_patcher.stop() - self.manager_patcher.stop() - def test_create_importer(self): """ Test creating an instance of the Zefania file importer From 6ab2686b0971989e3d79e356f19c18065b5c2c79 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Tue, 16 Aug 2016 21:36:21 +0100 Subject: [PATCH 05/11] Modify CSV Importer and test to give 100% coverage! --- .../plugins/bibles/lib/importers/csvbible.py | 2 +- .../openlp_plugins/bibles/test_bibleimport.py | 31 ------------------- .../openlp_plugins/bibles/test_csvimport.py | 2 +- 3 files changed, 2 insertions(+), 33 deletions(-) diff --git a/openlp/plugins/bibles/lib/importers/csvbible.py b/openlp/plugins/bibles/lib/importers/csvbible.py index 549cec581..3733145b6 100644 --- a/openlp/plugins/bibles/lib/importers/csvbible.py +++ b/openlp/plugins/bibles/lib/importers/csvbible.py @@ -142,7 +142,7 @@ class CSVBible(BibleImport): book_ptr = None for verse in verses: if self.stop_import_flag: - return None + break verse_book = self.get_book_name(verse.book_id_name, books) if book_ptr != verse_book: book = self.get_book(verse_book) diff --git a/tests/functional/openlp_plugins/bibles/test_bibleimport.py b/tests/functional/openlp_plugins/bibles/test_bibleimport.py index 127c6fd16..37c6c3fda 100644 --- a/tests/functional/openlp_plugins/bibles/test_bibleimport.py +++ b/tests/functional/openlp_plugins/bibles/test_bibleimport.py @@ -83,37 +83,6 @@ class TestBibleImport(TestCase): self.assertEqual(instance.filename, 'bible.xml') self.assertIsInstance(instance, BibleDB) - def check_for_compression_test(self): - """ - Test the check_for_compression method when called with a path to an uncompressed file - """ - # GIVEN: A mocked is_zipfile which returns False and an instance of BibleImport - with patch('openlp.plugins.bibles.lib.bibleimport.is_zipfile', return_value=False) as mocked_is_zip: - instance = BibleImport(MagicMock()) - - # WHEN: Calling check_for_compression - result = instance.check_for_compression('filename.tst') - - # THEN: None should be returned - self.assertIsNone(result) - mocked_is_zip.assert_called_once_with('filename.tst') - - def check_for_compression_zip_file_test(self): - """ - Test the check_for_compression method when called with a path to a compressed file - """ - # GIVEN: A patched is_zipfile which returns True and an instance of BibleImport - with patch('openlp.plugins.bibles.lib.bibleimport.is_zipfile', return_value=True),\ - patch('openlp.plugins.bibles.lib.bibleimport.critical_error_message_box') as mocked_message_box: - instance = BibleImport(MagicMock()) - - # WHEN: Calling check_for_compression - # THEN: A Validation error should be raised and the user should be notified. - with self.assertRaises(ValidationError) as context: - instance.check_for_compression('filename.tst') - self.assertTrue(mocked_message_box.called) - self.assertEqual(context.exception.msg, '"filename.tst" is compressed') - def get_language_id_language_found_test(self): """ Test get_language_id() when called with a name found in the languages list diff --git a/tests/functional/openlp_plugins/bibles/test_csvimport.py b/tests/functional/openlp_plugins/bibles/test_csvimport.py index 7a56c1b85..f6d3697af 100644 --- a/tests/functional/openlp_plugins/bibles/test_csvimport.py +++ b/tests/functional/openlp_plugins/bibles/test_csvimport.py @@ -240,7 +240,7 @@ class TestCSVImport(TestCase): importer.wizard = MagicMock() # WHEN: Calling process_verses - result = importer.process_verses([], []) + result = importer.process_verses(['Dummy Verse'], []) # THEN: get_book_name should not be called and the return value should be None self.assertFalse(importer.get_book_name.called) From 46b6d041cd50bea9f5dfea711be4f2168d7655fb Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Thu, 18 Aug 2016 07:31:36 +0100 Subject: [PATCH 06/11] Opensong refactors and tests --- openlp/plugins/bibles/lib/bibleimport.py | 19 +- .../plugins/bibles/lib/importers/opensong.py | 98 +++++++---- .../bibles/test_opensongimport.py | 164 +++++++++++++++--- 3 files changed, 220 insertions(+), 61 deletions(-) diff --git a/openlp/plugins/bibles/lib/bibleimport.py b/openlp/plugins/bibles/lib/bibleimport.py index 7ebdcb170..d6cfb83fa 100644 --- a/openlp/plugins/bibles/lib/bibleimport.py +++ b/openlp/plugins/bibles/lib/bibleimport.py @@ -23,9 +23,11 @@ import logging from lxml import etree, objectify +from zipfile import is_zipfile from openlp.core.common import OpenLPMixin, languages -from openlp.core.lib import ValidationError +from openlp.core.lib import ValidationError, translate +from openlp.core.lib.ui import critical_error_message_box from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB log = logging.getLogger(__name__) @@ -39,6 +41,21 @@ class BibleImport(OpenLPMixin, BibleDB): super().__init__(*args, **kwargs) self.filename = kwargs['filename'] if 'filename' in kwargs else None + @staticmethod + def is_compressed(file): + """ + Check if the supplied file is compressed + + :param file: A path to the file to check + """ + if is_zipfile(file): + critical_error_message_box( + message=translate('BiblesPlugin.BibleImport', + 'The file "{file}" you supplied is compressed. You must decompress it before import.' + ).format(file=file)) + return True + return False + def get_language_id(self, file_language=None, bible_name=None): """ Get the language_id for the language of the bible. Fallback to user input if we cannot do this pragmatically. diff --git a/openlp/plugins/bibles/lib/importers/opensong.py b/openlp/plugins/bibles/lib/importers/opensong.py index 10c0ed87e..66c127408 100644 --- a/openlp/plugins/bibles/lib/importers/opensong.py +++ b/openlp/plugins/bibles/lib/importers/opensong.py @@ -21,12 +21,12 @@ ############################################################################### import logging -from lxml import etree, objectify +from lxml import etree from openlp.core.common import translate, trace_error_handler +from openlp.core.lib.exceptions import ValidationError from openlp.core.lib.ui import critical_error_message_box from openlp.plugins.bibles.lib.bibleimport import BibleImport -from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB log = logging.getLogger(__name__) @@ -51,12 +51,69 @@ class OpenSongBible(BibleImport): verse_text += element.tail return verse_text + @staticmethod + def process_chapter_no(number, previous_number): + """ + Process the chapter number + + :param number: The raw data from the xml + :param previous_number: The previous chapter number + :return: Number of current chapter. (Int) + """ + if number: + return int(number.split()[-1]) + return previous_number + 1 + + @staticmethod + def process_verse_no(number, previous_number): + """ + Process the verse number retrieved from the xml + + :param number: The raw data from the xml + :param previous_number: The previous verse number + :return: Number of current verse. (Int) + """ + if not number: + return previous_number + 1 + try: + return int(number) + except ValueError: + verse_parts = number.split('-') + if len(verse_parts) > 1: + number = int(verse_parts[0]) + return number + except TypeError: + log.warning('Illegal verse number: {verse_no}'.format(verse_no=str(number))) + return previous_number + 1 + + @staticmethod + def validate_file(filename): + """ + Validate the supplied file + + :param filename: The supplied file + :return: True if valid. ValidationError is raised otherwise. + """ + if BibleImport.is_compressed(): + raise ValidationError(msg='Compressed file') + bible = BibleImport.parse_xml(filename, use_objectify=True) + root_tag = bible.tag.lower() + if root_tag != 'bible': + if root_tag == 'xmlbible': + # Zefania bibles have a root tag of XMLBIBLE". Sometimes these bibles are referred to as 'OpenSong' + critical_error_message_box( + message=translate('BiblesPlugin.OpenSongImport', + 'Incorrect Bible file type supplied. This looks like a Zefania XML bible, ' + 'please use the Zefania import option.')) + raise ValidationError(msg='Invalid xml.') + return True + def do_import(self, bible_name=None): """ Loads a Bible from file. """ + self.validate_file(self.filename) log.debug('Starting OpenSong import from "{name}"'.format(name=self.filename)) - success = True try: bible = self.parse_xml(self.filename, use_objectify=True) # Check that we're not trying to import a Zefania XML bible, it is sometimes refered to as 'OpenSong' @@ -78,46 +135,21 @@ class OpenSongBible(BibleImport): for chapter in book.c: if self.stop_import_flag: break - number = chapter.attrib['n'] - if number: - chapter_number = int(number.split()[-1]) - else: - chapter_number += 1 + chapter_number = self.process_chapter_no(chapter.attrib['n'], chapter_number) verse_number = 0 for verse in chapter.v: if self.stop_import_flag: break - number = verse.attrib['n'] - if number: - try: - number = int(number) - except ValueError: - verse_parts = number.split('-') - if len(verse_parts) > 1: - number = int(verse_parts[0]) - except TypeError: - log.warning('Illegal verse number: {verse:d}'.format(verse=verse.attrib['n'])) - verse_number = number - else: - verse_number += 1 + verse_number = self.process_verse_no(verse.attrib['n'], verse_number) self.create_verse(db_book.id, chapter_number, verse_number, self.get_text(verse)) self.wizard.increment_progress_bar(translate('BiblesPlugin.Opensong', 'Importing {name} {chapter}...' ).format(name=db_book.name, chapter=chapter_number)) self.session.commit() self.application.process_events() - except etree.XMLSyntaxError as inst: - trace_error_handler(log) - critical_error_message_box( - message=translate('BiblesPlugin.OpenSongImport', - 'Incorrect Bible file type supplied. OpenSong Bibles may be ' - 'compressed. You must decompress them before import.')) - log.exception(inst) - success = False - except (IOError, AttributeError): + except (AttributeError, ValidationError, etree.XMLSyntaxError): log.exception('Loading Bible from OpenSong file failed') - success = False + trace_error_handler(log) + return False if self.stop_import_flag: return False - else: - return success diff --git a/tests/functional/openlp_plugins/bibles/test_opensongimport.py b/tests/functional/openlp_plugins/bibles/test_opensongimport.py index d6997135b..af6215c45 100644 --- a/tests/functional/openlp_plugins/bibles/test_opensongimport.py +++ b/tests/functional/openlp_plugins/bibles/test_opensongimport.py @@ -27,9 +27,13 @@ import os import json from unittest import TestCase + +from lxml import objectify + from tests.functional import MagicMock, patch +from openlp.core.lib.exceptions import ValidationError from openlp.plugins.bibles.lib.importers.opensong import OpenSongBible -from openlp.plugins.bibles.lib.db import BibleDB +from openlp.plugins.bibles.lib.bibleimport import BibleImport TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'resources', 'bibles')) @@ -41,14 +45,12 @@ class TestOpenSongImport(TestCase): """ def setUp(self): - self.registry_patcher = patch('openlp.plugins.bibles.lib.db.Registry') - self.registry_patcher.start() self.manager_patcher = patch('openlp.plugins.bibles.lib.db.Manager') + self.addCleanup(self.manager_patcher.stop) self.manager_patcher.start() - - def tearDown(self): - self.registry_patcher.stop() - self.manager_patcher.stop() + self.registry_patcher = patch('openlp.plugins.bibles.lib.db.Registry') + self.addCleanup(self.registry_patcher.stop) + self.registry_patcher.start() def test_create_importer(self): """ @@ -61,7 +63,134 @@ class TestOpenSongImport(TestCase): importer = OpenSongBible(mocked_manager, path='.', name='.', filename='') # THEN: The importer should be an instance of BibleDB - self.assertIsInstance(importer, BibleDB) + self.assertIsInstance(importer, BibleImport) + + def process_chapter_no_test(self): + """ + Test process_chapter_no when supplied with chapter number and an instance of OpenSongBible + """ + # GIVEN: The number 10 represented as a string + # WHEN: Calling process_chapter_no + result = OpenSongBible.process_chapter_no('10', 0) + + # THEN: The 10 should be returned as an Int + self.assertEqual(result, 10) + + def process_chapter_no_empty_attribute_test(self): + """ + Test process_chapter_no when the chapter number is an empty string. (Bug #1074727) + """ + # GIVEN: An empty string, and the previous chapter number set as 12 and an instance of OpenSongBible + # WHEN: Calling process_chapter_no + result = OpenSongBible.process_chapter_no('', 12) + + # THEN: process_chapter_no should increment the previous verse number + self.assertEqual(result, 13) + + def process_verse_no_valid_verse_no_test(self): + """ + Test process_verse_no when supplied with a valid verse number + """ + # GIVEN: The number 15 represented as a string and an instance of OpenSongBible + # WHEN: Calling process_verse_no + result = OpenSongBible.process_verse_no('15', 0) + + # THEN: process_verse_no should return the verse number + self.assertEqual(result, 15) + + def process_verse_no_verse_range_test(self): + """ + Test process_verse_no when supplied with a verse range + """ + # GIVEN: The range 24-26 represented as a string + # WHEN: Calling process_verse_no + result = OpenSongBible.process_verse_no('24-26', 0) + + # THEN: process_verse_no should return the first verse number in the range + self.assertEqual(result, 24) + + def process_verse_no_invalid_verse_no_test(self): + """ + Test process_verse_no when supplied with a invalid verse number + """ + # GIVEN: An non numeric string represented as a string + # WHEN: Calling process_verse_no + result = OpenSongBible.process_verse_no('invalid', 41) + + # THEN: process_verse_no should increment the previous verse number + self.assertEqual(result, 42) + + def process_verse_no_empty_attribute_test(self): + """ + Test process_verse_no when the verse number is an empty string. (Bug #1074727) + """ + # GIVEN: An empty string, and the previous verse number set as 14 + # WHEN: Calling process_verse_no + result = OpenSongBible.process_verse_no('', 14) + + # THEN: process_verse_no should increment the previous verse number + self.assertEqual(result, 15) + + @patch('openlp.plugins.bibles.lib.importers.opensong.log') + def process_verse_no_invalid_type_test(self, mocked_log): + """ + Test process_verse_no when the verse number is an invalid type) + """ + # GIVEN: A mocked out log, a Tuple, and the previous verse number set as 12 + # WHEN: Calling process_verse_no + result = OpenSongBible.process_verse_no((1,2,3), 12) + + # THEN: process_verse_no should log the verse number it was called with increment the previous verse number + mocked_log.warning.assert_called_once_with('Illegal verse number: (1, 2, 3)') + self.assertEqual(result, 13) + + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport') + def validate_xml_bible_test(self, mocked_bible_import): + """ + Test that validate_xml returns True with valid XML + """ + # GIVEN: Some test data with an OpenSong Bible "bible" root tag + mocked_bible_import.parse_xml.return_value = objectify.fromstring('') + + # WHEN: Calling validate_xml + result = OpenSongBible.validate_file('file.name') + + # THEN: A True should be returned + self.assertTrue(result) + + @patch('openlp.plugins.bibles.lib.importers.opensong.critical_error_message_box') + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport') + def validate_xml_zefania_root_test(self, mocked_bible_import, mocked_message_box): + """ + Test that validate_xml raises a ValidationError with a Zefinia root tag + """ + # GIVEN: Some test data with a Zefinia "XMLBIBLE" root tag + mocked_bible_import.parse_xml.return_value = objectify.fromstring('') + + # WHEN: Calling validate_xml + # THEN: critical_error_message_box should be called and an ValidationError should be raised + with self.assertRaises(ValidationError) as context: + OpenSongBible.validate_file('file.name') + self.assertEqual(context.exception.msg, 'Invalid xml.') + mocked_message_box.assert_called_once_with( + message='Incorrect Bible file type supplied. This looks like a Zefania XML bible, please use the ' + 'Zefania import option.') + + @patch('openlp.plugins.bibles.lib.importers.opensong.critical_error_message_box') + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport') + def validate_xml_invalid_root_test(self, mocked_bible_import, mocked_message_box): + """ + Test that validate_xml raises a ValidationError with an invalid root tag + """ + # GIVEN: Some test data with an invalid root tag and an instance of OpenSongBible + mocked_bible_import.parse_xml.return_value = objectify.fromstring('') + + # WHEN: Calling validate_xml + # THEN: ValidationError should be raised, and the critical error message box should not have been called + with self.assertRaises(ValidationError) as context: + OpenSongBible.validate_file('file.name') + self.assertEqual(context.exception.msg, 'Invalid xml.') + self.assertFalse(mocked_message_box.called) def test_file_import(self): """ @@ -92,22 +221,3 @@ class TestOpenSongImport(TestCase): self.assertTrue(importer.create_verse.called) for verse_tag, verse_text in test_data['verses']: importer.create_verse.assert_any_call(importer.create_book().id, 1, int(verse_tag), verse_text) - - def test_zefania_import_error(self): - """ - Test that we give an error message if trying to import a zefania bible - """ - # GIVEN: A mocked out "manager" and mocked out critical_error_message_box and an import - with patch('openlp.plugins.bibles.lib.importers.opensong.critical_error_message_box') as \ - mocked_critical_error_message_box: - mocked_manager = MagicMock() - importer = OpenSongBible(mocked_manager, path='.', name='.', filename='') - - # WHEN: An trying to import a zefania bible - importer.filename = os.path.join(TEST_PATH, 'zefania-dk1933.xml') - importer.do_import() - - # THEN: The importer should have "shown" an error message - mocked_critical_error_message_box.assert_called_with(message='Incorrect Bible file type supplied. ' - 'This looks like a Zefania XML bible, ' - 'please use the Zefania import option.') From 7c77d7e8bd0c6b73df164a3d772d7faf4ef1dbe9 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sat, 20 Aug 2016 19:12:42 +0100 Subject: [PATCH 07/11] Refactor of OpenSong Bible importer + 100% test coverage --- .../plugins/bibles/lib/importers/opensong.py | 77 ++-- .../openlp_core_ui/test_exceptionform.py | 7 +- .../bibles/test_opensongimport.py | 431 +++++++++++++++--- 3 files changed, 414 insertions(+), 101 deletions(-) diff --git a/openlp/plugins/bibles/lib/importers/opensong.py b/openlp/plugins/bibles/lib/importers/opensong.py index 66c127408..c0a82a4ff 100644 --- a/openlp/plugins/bibles/lib/importers/opensong.py +++ b/openlp/plugins/bibles/lib/importers/opensong.py @@ -36,7 +36,8 @@ class OpenSongBible(BibleImport): """ OpenSong Bible format importer class. This class is used to import Bibles from OpenSong's XML format. """ - def get_text(self, element): + @staticmethod + def get_text(element): """ Recursively get all text in an objectify element and its child elements. @@ -46,15 +47,15 @@ class OpenSongBible(BibleImport): if element.text: verse_text = element.text for sub_element in element.iterchildren(): - verse_text += self.get_text(sub_element) + verse_text += OpenSongBible.get_text(sub_element) if element.tail: verse_text += element.tail return verse_text @staticmethod - def process_chapter_no(number, previous_number): + def parse_chapter_number(number, previous_number): """ - Process the chapter number + Parse the chapter number :param number: The raw data from the xml :param previous_number: The previous chapter number @@ -65,9 +66,9 @@ class OpenSongBible(BibleImport): return previous_number + 1 @staticmethod - def process_verse_no(number, previous_number): + def parse_verse_number(number, previous_number): """ - Process the verse number retrieved from the xml + Parse the verse number retrieved from the xml :param number: The raw data from the xml :param previous_number: The previous verse number @@ -94,7 +95,7 @@ class OpenSongBible(BibleImport): :param filename: The supplied file :return: True if valid. ValidationError is raised otherwise. """ - if BibleImport.is_compressed(): + if BibleImport.is_compressed(filename): raise ValidationError(msg='Compressed file') bible = BibleImport.parse_xml(filename, use_objectify=True) root_tag = bible.tag.lower() @@ -108,44 +109,47 @@ class OpenSongBible(BibleImport): raise ValidationError(msg='Invalid xml.') return True + def process_books(self, books): + for book in books: + if self.stop_import_flag: + break + db_book = self.find_and_create_book(str(book.attrib['n']), len(books), self.language_id) + self.process_chapters(db_book, book.c) + self.session.commit() + + def process_chapters(self, book, chapters): + chapter_number = 0 + for chapter in chapters: + if self.stop_import_flag: + break + chapter_number = self.parse_chapter_number(chapter.attrib['n'], chapter_number) + self.process_verses(book, chapter_number, chapter.v) + self.wizard.increment_progress_bar(translate('BiblesPlugin.Opensong', + 'Importing {name} {chapter}...' + ).format(name=book.name, chapter=chapter_number)) + + def process_verses(self, book, chapter_number, verses): + verse_number = 0 + for verse in verses: + if self.stop_import_flag: + break + verse_number = self.parse_verse_number(verse.attrib['n'], verse_number) + self.create_verse(book.id, chapter_number, verse_number, self.get_text(verse)) + def do_import(self, bible_name=None): """ - Loads a Bible from file. + Loads an Open Song Bible from a file. """ - self.validate_file(self.filename) log.debug('Starting OpenSong import from "{name}"'.format(name=self.filename)) try: + self.validate_file(self.filename) bible = self.parse_xml(self.filename, use_objectify=True) # Check that we're not trying to import a Zefania XML bible, it is sometimes refered to as 'OpenSong' - if bible.tag.upper() == 'XMLBIBLE': - critical_error_message_box( - message=translate('BiblesPlugin.OpenSongImport', - 'Incorrect Bible file type supplied. This looks like a Zefania XML bible, ' - 'please use the Zefania import option.')) - return False # No language info in the opensong format, so ask the user - language_id = self.get_language_id(bible_name=self.filename) - if not language_id: + self.language_id = self.get_language_id(bible_name=self.filename) + if not self.language_id: return False - for book in bible.b: - if self.stop_import_flag: - break - db_book = self.find_and_create_book(str(book.attrib['n']), len(bible.b), language_id) - chapter_number = 0 - for chapter in book.c: - if self.stop_import_flag: - break - chapter_number = self.process_chapter_no(chapter.attrib['n'], chapter_number) - verse_number = 0 - for verse in chapter.v: - if self.stop_import_flag: - break - verse_number = self.process_verse_no(verse.attrib['n'], verse_number) - self.create_verse(db_book.id, chapter_number, verse_number, self.get_text(verse)) - self.wizard.increment_progress_bar(translate('BiblesPlugin.Opensong', - 'Importing {name} {chapter}...' - ).format(name=db_book.name, chapter=chapter_number)) - self.session.commit() + self.process_books(bible.b) self.application.process_events() except (AttributeError, ValidationError, etree.XMLSyntaxError): log.exception('Loading Bible from OpenSong file failed') @@ -153,3 +157,4 @@ class OpenSongBible(BibleImport): return False if self.stop_import_flag: return False + return True diff --git a/tests/functional/openlp_core_ui/test_exceptionform.py b/tests/functional/openlp_core_ui/test_exceptionform.py index 452a8dee9..493b2baeb 100644 --- a/tests/functional/openlp_core_ui/test_exceptionform.py +++ b/tests/functional/openlp_core_ui/test_exceptionform.py @@ -24,18 +24,13 @@ Package to test the openlp.core.ui.exeptionform package. """ import os -import socket import tempfile -import urllib from unittest import TestCase from unittest.mock import mock_open -from PyQt5.QtCore import QUrlQuery - from openlp.core.common import Registry -from openlp.core.ui.firsttimeform import FirstTimeForm -from tests.functional import MagicMock, patch +from tests.functional import patch from tests.helpers.testmixin import TestMixin from openlp.core.ui import exceptionform diff --git a/tests/functional/openlp_plugins/bibles/test_opensongimport.py b/tests/functional/openlp_plugins/bibles/test_opensongimport.py index af6215c45..ee4e794c0 100644 --- a/tests/functional/openlp_plugins/bibles/test_opensongimport.py +++ b/tests/functional/openlp_plugins/bibles/test_opensongimport.py @@ -23,14 +23,15 @@ This module contains tests for the OpenSong Bible importer. """ -import os import json +import os from unittest import TestCase +from lxml import etree, objectify -from lxml import objectify - -from tests.functional import MagicMock, patch +from tests.functional import MagicMock, patch, call +from tests.helpers.testmixin import TestMixin +from openlp.core.common import Registry from openlp.core.lib.exceptions import ValidationError from openlp.plugins.bibles.lib.importers.opensong import OpenSongBible from openlp.plugins.bibles.lib.bibleimport import BibleImport @@ -39,7 +40,7 @@ TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'resources', 'bibles')) -class TestOpenSongImport(TestCase): +class TestOpenSongImport(TestCase, TestMixin): """ Test the functions in the :mod:`opensongimport` module. """ @@ -48,9 +49,10 @@ class TestOpenSongImport(TestCase): self.manager_patcher = patch('openlp.plugins.bibles.lib.db.Manager') self.addCleanup(self.manager_patcher.stop) self.manager_patcher.start() - self.registry_patcher = patch('openlp.plugins.bibles.lib.db.Registry') - self.addCleanup(self.registry_patcher.stop) - self.registry_patcher.start() + self.setup_application() + self.app.process_events = MagicMock() + Registry.create() + Registry().register('application', self.app) def test_create_importer(self): """ @@ -65,133 +67,444 @@ class TestOpenSongImport(TestCase): # THEN: The importer should be an instance of BibleDB self.assertIsInstance(importer, BibleImport) - def process_chapter_no_test(self): + def get_text_no_text_test(self): """ - Test process_chapter_no when supplied with chapter number and an instance of OpenSongBible + Test that get_text handles elements containing text in a combination of text and tail attributes + """ + # GIVEN: Some test data which contains an empty element and an instance of OpenSongBible + test_data = objectify.fromstring('') + + # WHEN: Calling get_text + result = OpenSongBible.get_text(test_data) + + # THEN: A blank string should be returned + self.assertEqual(result, '') + + def get_text_text_test(self): + """ + Test that get_text handles elements containing text in a combination of text and tail attributes + """ + # GIVEN: Some test data which contains all possible permutation of text and tail text possible and an instance + # of OpenSongBible + test_data = objectify.fromstring('Element text ' + 'sub_text_tail text sub_text_tail tail ' + 'sub_text text ' + 'sub_tail tail') + + # WHEN: Calling get_text + result = OpenSongBible.get_text(test_data) + + # THEN: The text returned should be as expected + self.assertEqual(result, 'Element text sub_text_tail text sub_text_tail tail sub_text text sub_tail tail') + + def parse_chapter_number_test(self): + """ + Test parse_chapter_number when supplied with chapter number and an instance of OpenSongBible """ # GIVEN: The number 10 represented as a string - # WHEN: Calling process_chapter_no - result = OpenSongBible.process_chapter_no('10', 0) + # WHEN: Calling parse_chapter_nnumber + result = OpenSongBible.parse_chapter_number('10', 0) # THEN: The 10 should be returned as an Int self.assertEqual(result, 10) - def process_chapter_no_empty_attribute_test(self): + def parse_chapter_number_empty_attribute_test(self): """ - Test process_chapter_no when the chapter number is an empty string. (Bug #1074727) + Testparse_chapter_number when the chapter number is an empty string. (Bug #1074727) """ # GIVEN: An empty string, and the previous chapter number set as 12 and an instance of OpenSongBible - # WHEN: Calling process_chapter_no - result = OpenSongBible.process_chapter_no('', 12) + # WHEN: Calling parse_chapter_number + result = OpenSongBible.parse_chapter_number('', 12) - # THEN: process_chapter_no should increment the previous verse number + # THEN: parse_chapter_number should increment the previous verse number self.assertEqual(result, 13) - def process_verse_no_valid_verse_no_test(self): + def parse_verse_number_valid_verse_no_test(self): """ - Test process_verse_no when supplied with a valid verse number + Test parse_verse_number when supplied with a valid verse number """ # GIVEN: The number 15 represented as a string and an instance of OpenSongBible - # WHEN: Calling process_verse_no - result = OpenSongBible.process_verse_no('15', 0) + # WHEN: Calling parse_verse_number + result = OpenSongBible.parse_verse_number('15', 0) - # THEN: process_verse_no should return the verse number + # THEN: parse_verse_number should return the verse number self.assertEqual(result, 15) - def process_verse_no_verse_range_test(self): + def parse_verse_number_verse_range_test(self): """ - Test process_verse_no when supplied with a verse range + Test parse_verse_number when supplied with a verse range """ # GIVEN: The range 24-26 represented as a string - # WHEN: Calling process_verse_no - result = OpenSongBible.process_verse_no('24-26', 0) + # WHEN: Calling parse_verse_number + result = OpenSongBible.parse_verse_number('24-26', 0) - # THEN: process_verse_no should return the first verse number in the range + # THEN: parse_verse_number should return the first verse number in the range self.assertEqual(result, 24) - def process_verse_no_invalid_verse_no_test(self): + def parse_verse_number_invalid_verse_no_test(self): """ - Test process_verse_no when supplied with a invalid verse number + Test parse_verse_number when supplied with a invalid verse number """ # GIVEN: An non numeric string represented as a string - # WHEN: Calling process_verse_no - result = OpenSongBible.process_verse_no('invalid', 41) + # WHEN: Calling parse_verse_number + result = OpenSongBible.parse_verse_number('invalid', 41) - # THEN: process_verse_no should increment the previous verse number + # THEN: parse_verse_number should increment the previous verse number self.assertEqual(result, 42) - def process_verse_no_empty_attribute_test(self): + def parse_verse_number_empty_attribute_test(self): """ - Test process_verse_no when the verse number is an empty string. (Bug #1074727) + Test parse_verse_number when the verse number is an empty string. (Bug #1074727) """ # GIVEN: An empty string, and the previous verse number set as 14 - # WHEN: Calling process_verse_no - result = OpenSongBible.process_verse_no('', 14) + # WHEN: Calling parse_verse_number + result = OpenSongBible.parse_verse_number('', 14) - # THEN: process_verse_no should increment the previous verse number + # THEN: parse_verse_number should increment the previous verse number self.assertEqual(result, 15) @patch('openlp.plugins.bibles.lib.importers.opensong.log') - def process_verse_no_invalid_type_test(self, mocked_log): + def parse_verse_number_invalid_type_test(self, mocked_log): """ - Test process_verse_no when the verse number is an invalid type) + Test parse_verse_number when the verse number is an invalid type) """ # GIVEN: A mocked out log, a Tuple, and the previous verse number set as 12 - # WHEN: Calling process_verse_no - result = OpenSongBible.process_verse_no((1,2,3), 12) + # WHEN: Calling parse_verse_number + result = OpenSongBible.parse_verse_number((1, 2, 3), 12) - # THEN: process_verse_no should log the verse number it was called with increment the previous verse number + # THEN: parse_verse_number should log the verse number it was called with increment the previous verse number mocked_log.warning.assert_called_once_with('Illegal verse number: (1, 2, 3)') self.assertEqual(result, 13) - @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport') - def validate_xml_bible_test(self, mocked_bible_import): + @patch('openlp.plugins.bibles.lib.bibleimport.BibleImport.find_and_create_book') + def process_books_stop_import_test(self, mocked_find_and_create_book): """ - Test that validate_xml returns True with valid XML + Test process_books when stop_import is set to True + """ + # GIVEN: An isntance of OpenSongBible + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + + # WHEN: stop_import_flag is set to True + importer.stop_import_flag = True + importer.process_books(['Book']) + + # THEN: find_and_create_book should not have been called + self.assertFalse(mocked_find_and_create_book.called) + + @patch('openlp.plugins.bibles.lib.bibleimport.BibleImport.find_and_create_book', + **{'side_effect': ['db_book1', 'db_book2']}) + def process_books_completes_test(self, mocked_find_and_create_book): + """ + Test process_books when it processes all books + """ + # GIVEN: An instance of OpenSongBible Importer and two mocked books + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + + book1 = MagicMock() + book1.attrib = {'n': 'Name1'} + book1.c = 'Chapter1' + book2 = MagicMock() + book2.attrib = {'n': 'Name2'} + book2.c = 'Chapter2' + importer.language_id = 10 + importer.process_chapters = MagicMock() + importer.session = MagicMock() + importer.stop_import_flag = False + + # WHEN: Calling process_books with the two books + importer.process_books([book1, book2]) + + # THEN: find_and_create_book and process_books should be called with the details from the mocked books + self.assertEqual(mocked_find_and_create_book.call_args_list, [call('Name1', 2, 10), call('Name2', 2, 10)]) + self.assertEqual(importer.process_chapters.call_args_list, + [call('db_book1', 'Chapter1'), call('db_book2', 'Chapter2')]) + self.assertEqual(importer.session.commit.call_count, 2) + + def process_chapters_stop_import_test(self): + """ + Test process_chapters when stop_import is set to True + """ + # GIVEN: An isntance of OpenSongBible + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.parse_chapter_number = MagicMock() + + # WHEN: stop_import_flag is set to True + importer.stop_import_flag = True + importer.process_chapters('Book', ['Chapter1']) + + # THEN: importer.parse_chapter_number not have been called + self.assertFalse(importer.parse_chapter_number.called) + + @patch('openlp.plugins.bibles.lib.importers.opensong.translate', **{'side_effect': lambda x, y: y}) + def process_chapters_completes_test(self, mocked_translate): + """ + Test process_chapters when it completes + """ + # GIVEN: An instance of OpenSongBible + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.parse_chapter_number = MagicMock() + importer.parse_chapter_number.side_effect = [1, 2] + importer.wizard = MagicMock() + + # WHEN: called with some valid data + book = MagicMock() + book.name = "Book" + chapter1 = MagicMock() + chapter1.attrib = {'n': '1'} + chapter1.c = 'Chapter1' + chapter1.v = ['Chapter1 Verses'] + chapter2 = MagicMock() + chapter2.attrib = {'n': '2'} + chapter2.c = 'Chapter2' + chapter2.v = ['Chapter2 Verses'] + + importer.process_verses = MagicMock() + importer.stop_import_flag = False + importer.process_chapters(book, [chapter1, chapter2]) + + # THEN: parse_chapter_number, process_verses and increment_process_bar should have been called + self.assertEqual(importer.parse_chapter_number.call_args_list, [call('1', 0), call('2', 1)]) + self.assertEqual( + importer.process_verses.call_args_list, + [call(book, 1, ['Chapter1 Verses']), call(book, 2, ['Chapter2 Verses'])]) + self.assertEqual(importer.wizard.increment_progress_bar.call_args_list, + [call('Importing Book 1...'), call('Importing Book 2...')]) + + def process_verses_stop_import_test(self): + """ + Test process_verses when stop_import is set to True + """ + # GIVEN: An isntance of OpenSongBible + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.parse_verse_number = MagicMock() + + # WHEN: stop_import_flag is set to True + importer.stop_import_flag = True + importer.process_verses('Book', 1, 'Verses') + + # THEN: importer.parse_verse_number not have been called + self.assertFalse(importer.parse_verse_number.called) + + def process_verses_completes_test(self): + """ + Test process_verses when it completes + """ + # GIVEN: An instance of OpenSongBible + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.get_text = MagicMock() + importer.get_text.side_effect = ['Verse1 Text', 'Verse2 Text'] + importer.parse_verse_number = MagicMock() + importer.parse_verse_number.side_effect = [1, 2] + importer.wizard = MagicMock() + + # WHEN: called with some valid data + book = MagicMock() + book.id = 1 + verse1 = MagicMock() + verse1.attrib = {'n': '1'} + verse1.c = 'Chapter1' + verse1.v = ['Chapter1 Verses'] + verse2 = MagicMock() + verse2.attrib = {'n': '2'} + verse2.c = 'Chapter2' + verse2.v = ['Chapter2 Verses'] + + importer.create_verse = MagicMock() + importer.stop_import_flag = False + importer.process_verses(book, 1, [verse1, verse2]) + + # THEN: parse_chapter_number, process_verses and increment_process_bar should have been called + self.assertEqual(importer.parse_verse_number.call_args_list, [call('1', 0), call('2', 1)]) + self.assertEqual(importer.get_text.call_args_list, [call(verse1), call(verse2)]) + self.assertEqual( + importer.create_verse.call_args_list, + [call(1, 1, 1, 'Verse1 Text'), call(1, 1, 2, 'Verse2 Text')]) + + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.is_compressed') + def validate_file_compressed_test(self, mocked_is_compressed): + """ + Test that validate_file raises a ValidationError when supplied with a compressed file + """ + # GIVEN: A mocked is_compressed method which returns True + mocked_is_compressed.return_value = True + + # WHEN: Calling validate_file + # THEN: ValidationError should be raised + with self.assertRaises(ValidationError) as context: + OpenSongBible.validate_file('file.name') + self.assertEqual(context.exception.msg, 'Compressed file') + + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.parse_xml') + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.is_compressed', **{'return_value': False}) + def validate_file_bible_test(self, mocked_is_compressed, mocked_parse_xml): + """ + Test that validate_file returns True with valid XML """ # GIVEN: Some test data with an OpenSong Bible "bible" root tag - mocked_bible_import.parse_xml.return_value = objectify.fromstring('') + mocked_parse_xml.return_value = objectify.fromstring('') - # WHEN: Calling validate_xml + # WHEN: Calling validate_file result = OpenSongBible.validate_file('file.name') # THEN: A True should be returned self.assertTrue(result) @patch('openlp.plugins.bibles.lib.importers.opensong.critical_error_message_box') - @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport') - def validate_xml_zefania_root_test(self, mocked_bible_import, mocked_message_box): + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.parse_xml') + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.is_compressed', **{'return_value': False}) + def validate_file_zefania_root_test(self, mocked_is_compressed, mocked_parse_xml, mocked_message_box): """ - Test that validate_xml raises a ValidationError with a Zefinia root tag + Test that validate_file raises a ValidationError with a Zefinia root tag """ # GIVEN: Some test data with a Zefinia "XMLBIBLE" root tag - mocked_bible_import.parse_xml.return_value = objectify.fromstring('') + mocked_parse_xml.return_value = objectify.fromstring('') - # WHEN: Calling validate_xml + # WHEN: Calling validate_file # THEN: critical_error_message_box should be called and an ValidationError should be raised with self.assertRaises(ValidationError) as context: OpenSongBible.validate_file('file.name') - self.assertEqual(context.exception.msg, 'Invalid xml.') + self.assertEqual(context.exception.msg, 'Invalid xml.') mocked_message_box.assert_called_once_with( message='Incorrect Bible file type supplied. This looks like a Zefania XML bible, please use the ' 'Zefania import option.') @patch('openlp.plugins.bibles.lib.importers.opensong.critical_error_message_box') - @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport') - def validate_xml_invalid_root_test(self, mocked_bible_import, mocked_message_box): + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.parse_xml') + @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.is_compressed', **{'return_value': False}) + def validate_file_invalid_root_test(self, mocked_is_compressed, mocked_parse_xml, mocked_message_box): """ - Test that validate_xml raises a ValidationError with an invalid root tag + Test that validate_file raises a ValidationError with an invalid root tag """ # GIVEN: Some test data with an invalid root tag and an instance of OpenSongBible - mocked_bible_import.parse_xml.return_value = objectify.fromstring('') + mocked_parse_xml.return_value = objectify.fromstring('') - # WHEN: Calling validate_xml + # WHEN: Calling validate_file # THEN: ValidationError should be raised, and the critical error message box should not have been called with self.assertRaises(ValidationError) as context: OpenSongBible.validate_file('file.name') - self.assertEqual(context.exception.msg, 'Invalid xml.') + self.assertEqual(context.exception.msg, 'Invalid xml.') self.assertFalse(mocked_message_box.called) + @patch('openlp.plugins.bibles.lib.importers.opensong.log') + @patch('openlp.plugins.bibles.lib.importers.opensong.trace_error_handler') + def do_import_attribute_error_test(self, mocked_trace_error_handler, mocked_log): + """ + Test do_import when an AttributeError exception is raised + """ + # GIVEN: An instance of OpenSongBible and a mocked validate_file which raises an AttributeError + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.validate_file = MagicMock(**{'side_effect': AttributeError()}) + importer.parse_xml = MagicMock() + + # WHEN: Calling do_import + result = importer.do_import() + + # THEN: do_import should return False after logging the exception + mocked_log.exception.assert_called_once_with('Loading Bible from OpenSong file failed') + mocked_trace_error_handler.assert_called_once_with(mocked_log) + self.assertFalse(result) + self.assertFalse(importer.parse_xml.called) + + @patch('openlp.plugins.bibles.lib.importers.opensong.log') + @patch('openlp.plugins.bibles.lib.importers.opensong.trace_error_handler') + def do_import_validation_error_test(self, mocked_trace_error_handler, mocked_log): + """ + Test do_import when an ValidationError exception is raised + """ + # GIVEN: An instance of OpenSongBible and a mocked validate_file which raises an ValidationError + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.validate_file = MagicMock(**{'side_effect': ValidationError()}) + importer.parse_xml = MagicMock() + + # WHEN: Calling do_import + result = importer.do_import() + + # THEN: do_import should return False after logging the exception. parse_xml should not be called. + mocked_log.exception.assert_called_once_with('Loading Bible from OpenSong file failed') + mocked_trace_error_handler.assert_called_once_with(mocked_log) + self.assertFalse(result) + self.assertFalse(importer.parse_xml.called) + + @patch('openlp.plugins.bibles.lib.importers.opensong.log') + @patch('openlp.plugins.bibles.lib.importers.opensong.trace_error_handler') + def do_import_xml_syntax_error_test(self, mocked_trace_error_handler, mocked_log): + """ + Test do_import when an etree.XMLSyntaxError exception is raised + """ + # GIVEN: An instance of OpenSongBible and a mocked validate_file which raises an etree.XMLSyntaxError + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.validate_file = MagicMock(**{'side_effect': etree.XMLSyntaxError(None, None, None, None)}) + importer.parse_xml = MagicMock() + + # WHEN: Calling do_import + result = importer.do_import() + + # THEN: do_import should return False after logging the exception. parse_xml should not be called. + mocked_log.exception.assert_called_once_with('Loading Bible from OpenSong file failed') + mocked_trace_error_handler.assert_called_once_with(mocked_log) + self.assertFalse(result) + self.assertFalse(importer.parse_xml.called) + + @patch('openlp.plugins.bibles.lib.importers.opensong.log') + def do_import_no_language_test(self, mocked_log): + """ + Test do_import when the user cancels the language selection dialog + """ + # GIVEN: An instance of OpenSongBible and a mocked get_language which returns False + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.validate_file = MagicMock() + importer.parse_xml = MagicMock() + importer.get_language_id = MagicMock(**{'return_value': False}) + importer.process_books = MagicMock() + + # WHEN: Calling do_import + result = importer.do_import() + + # THEN: do_import should return False and process_books should have not been called + self.assertFalse(result) + self.assertFalse(importer.process_books.called) + + @patch('openlp.plugins.bibles.lib.importers.opensong.log') + def do_import_stop_import_test(self, mocked_log): + """ + Test do_import when the stop_import_flag is set to True + """ + # GIVEN: An instance of OpenSongBible and stop_import_flag set to True + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.validate_file = MagicMock() + importer.parse_xml = MagicMock() + importer.get_language_id = MagicMock(**{'return_value': 10}) + importer.process_books = MagicMock() + importer.stop_import_flag = True + + # WHEN: Calling do_import + result = importer.do_import() + + # THEN: do_import should return False and process_books should have not been called + self.assertFalse(result) + self.assertTrue(importer.application.process_events.called) + + self.assertTrue(importer.application.process_events.called) + + @patch('openlp.plugins.bibles.lib.importers.opensong.log') + def do_import_completes_test(self, mocked_log): + """ + Test do_import when it completes successfully + """ + # GIVEN: An instance of OpenSongBible and stop_import_flag set to True + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') + importer.validate_file = MagicMock() + importer.parse_xml = MagicMock() + importer.get_language_id = MagicMock(**{'return_value': 10}) + importer.process_books = MagicMock() + importer.stop_import_flag = False + + # WHEN: Calling do_import + result = importer.do_import() + + # THEN: do_import should return True + self.assertTrue(result) + def test_file_import(self): """ Test the actual import of OpenSong Bible file From 894b4fbf10c08f9f0489bab6086199c8728a3809 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sat, 20 Aug 2016 21:00:50 +0100 Subject: [PATCH 08/11] revert changes to http.py due to circular references --- .../plugins/bibles/forms/bibleimportform.py | 4 +- openlp/plugins/bibles/lib/http.py | 535 ++++++++++++++++++ .../bibles/lib/importers/biblegateway.py | 313 ---------- .../bibles/lib/importers/bibleserver.py | 162 ------ .../plugins/bibles/lib/importers/crosswalk.py | 171 ------ .../openlp_plugins/bibles/test_bibleserver.py | 43 +- .../openlp_plugins/bibles/test_lib_http.py | 4 +- 7 files changed, 572 insertions(+), 660 deletions(-) delete mode 100644 openlp/plugins/bibles/lib/importers/biblegateway.py delete mode 100644 openlp/plugins/bibles/lib/importers/bibleserver.py delete mode 100644 openlp/plugins/bibles/lib/importers/crosswalk.py diff --git a/openlp/plugins/bibles/forms/bibleimportform.py b/openlp/plugins/bibles/forms/bibleimportform.py index e9eee88d5..3d02228ca 100644 --- a/openlp/plugins/bibles/forms/bibleimportform.py +++ b/openlp/plugins/bibles/forms/bibleimportform.py @@ -40,9 +40,7 @@ from openlp.core.ui.lib.wizard import OpenLPWizard, WizardStrings from openlp.core.common.languagemanager import get_locale_key from openlp.plugins.bibles.lib.manager import BibleFormat from openlp.plugins.bibles.lib.db import clean_filename -from openlp.plugins.bibles.lib.importers.biblegateway import BGExtract -from openlp.plugins.bibles.lib.importers.bibleserver import BSExtract -from openlp.plugins.bibles.lib.importers.crosswalk import CWExtract +from openlp.plugins.bibles.lib.importers.http import CWExtract, BGExtract, BSExtract log = logging.getLogger(__name__) diff --git a/openlp/plugins/bibles/lib/http.py b/openlp/plugins/bibles/lib/http.py index 5afd107f6..6921c9005 100644 --- a/openlp/plugins/bibles/lib/http.py +++ b/openlp/plugins/bibles/lib/http.py @@ -38,10 +38,545 @@ from openlp.plugins.bibles.lib.bibleimport import BibleImport from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB, Book CLEANER_REGEX = re.compile(r' |
|\'\+\'') +FIX_PUNKCTUATION_REGEX = re.compile(r'[ ]+([.,;])') +REDUCE_SPACES_REGEX = re.compile(r'[ ]{2,}') +UGLY_CHARS = { + '\u2014': ' - ', + '\u2018': '\'', + '\u2019': '\'', + '\u201c': '"', + '\u201d': '"', + ' ': ' ' +} +VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*') + +BIBLESERVER_LANGUAGE_CODE = { + 'fl_1': 'de', + 'fl_2': 'en', + 'fl_3': 'fr', + 'fl_4': 'it', + 'fl_5': 'es', + 'fl_6': 'pt', + 'fl_7': 'ru', + 'fl_8': 'sv', + 'fl_9': 'no', + 'fl_10': 'nl', + 'fl_11': 'cs', + 'fl_12': 'sk', + 'fl_13': 'ro', + 'fl_14': 'hr', + 'fl_15': 'hu', + 'fl_16': 'bg', + 'fl_17': 'ar', + 'fl_18': 'tr', + 'fl_19': 'pl', + 'fl_20': 'da', + 'fl_21': 'zh' +} + +CROSSWALK_LANGUAGES = { + 'Portuguese': 'pt', + 'German': 'de', + 'Italian': 'it', + 'Español': 'es', + 'French': 'fr', + 'Dutch': 'nl' +} log = logging.getLogger(__name__) +class BGExtract(RegistryProperties): + """ + Extract verses from BibleGateway + """ + def __init__(self, proxy_url=None): + log.debug('BGExtract.init("{url}")'.format(url=proxy_url)) + self.proxy_url = proxy_url + socket.setdefaulttimeout(30) + + def _remove_elements(self, parent, tag, class_=None): + """ + Remove a particular element from the BeautifulSoup tree. + + :param parent: The element from which items need to be removed. + :param tag: A string of the tab type, e.g. "div" + :param class_: An HTML class attribute for further qualification. + """ + if class_: + all_tags = parent.find_all(tag, class_) + else: + all_tags = parent.find_all(tag) + for element in all_tags: + element.extract() + + def _extract_verse(self, tag): + """ + Extract a verse (or part of a verse) from a tag. + + :param tag: The BeautifulSoup Tag element with the stuff we want. + """ + if isinstance(tag, NavigableString): + return None, str(tag) + elif tag.get('class') and (tag.get('class')[0] == 'versenum' or tag.get('class')[0] == 'versenum mid-line'): + verse = str(tag.string).replace('[', '').replace(']', '').strip() + return verse, None + elif tag.get('class') and tag.get('class')[0] == 'chapternum': + verse = '1' + return verse, None + else: + verse = None + text = '' + for child in tag.contents: + c_verse, c_text = self._extract_verse(child) + if c_verse: + verse = c_verse + if text and c_text: + text += c_text + elif c_text is not None: + text = c_text + return verse, text + + def _clean_soup(self, tag): + """ + Remove all the rubbish from the HTML page. + + :param tag: The base tag within which we want to remove stuff. + """ + self._remove_elements(tag, 'sup', 'crossreference') + self._remove_elements(tag, 'sup', 'footnote') + self._remove_elements(tag, 'div', 'footnotes') + self._remove_elements(tag, 'div', 'crossrefs') + self._remove_elements(tag, 'h3') + self._remove_elements(tag, 'h4') + self._remove_elements(tag, 'h5') + + def _extract_verses(self, tags): + """ + Extract all the verses from a pre-prepared list of HTML tags. + + :param tags: A list of BeautifulSoup Tag elements. + """ + verses = [] + tags = tags[::-1] + current_text = '' + for tag in tags: + verse = None + text = '' + for child in tag.contents: + c_verse, c_text = self._extract_verse(child) + if c_verse: + verse = c_verse + if text and c_text: + text += c_text + elif c_text is not None: + text = c_text + if not verse: + current_text = text + ' ' + current_text + else: + text += ' ' + current_text + current_text = '' + if text: + for old, new in UGLY_CHARS.items(): + text = text.replace(old, new) + text = ' '.join(text.split()) + if verse and text: + verse = verse.strip() + try: + verse = int(verse) + except ValueError: + verse_parts = verse.split('-') + if len(verse_parts) > 1: + verse = int(verse_parts[0]) + except TypeError: + log.warning('Illegal verse number: {verse:d}'.format(verse=verse)) + verses.append((verse, text)) + verse_list = {} + for verse, text in verses[::-1]: + verse_list[verse] = text + return verse_list + + def _extract_verses_old(self, div): + """ + Use the old style of parsing for those Bibles on BG who mysteriously have not been migrated to the new (still + broken) HTML. + + :param div: The parent div. + """ + verse_list = {} + # Cater for inconsistent mark up in the first verse of a chapter. + first_verse = div.find('versenum') + if first_verse and first_verse.contents: + verse_list[1] = str(first_verse.contents[0]) + for verse in div('sup', 'versenum'): + raw_verse_num = verse.next_element + clean_verse_num = 0 + # Not all verses exist in all translations and may or may not be represented by a verse number. If they are + # not fine, if they are it will probably be in a format that breaks int(). We will then have no idea what + # garbage may be sucked in to the verse text so if we do not get a clean int() then ignore the verse + # completely. + try: + clean_verse_num = int(str(raw_verse_num)) + except ValueError: + verse_parts = str(raw_verse_num).split('-') + if len(verse_parts) > 1: + clean_verse_num = int(verse_parts[0]) + except TypeError: + log.warning('Illegal verse number: {verse:d}'.format(verse=raw_verse_num)) + if clean_verse_num: + verse_text = raw_verse_num.next_element + part = raw_verse_num.next_element.next_element + while not (isinstance(part, Tag) and part.get('class')[0] == 'versenum'): + # While we are still in the same verse grab all the text. + if isinstance(part, NavigableString): + verse_text += part + if isinstance(part.next_element, Tag) and part.next_element.name == 'div': + # Run out of verses so stop. + break + part = part.next_element + verse_list[clean_verse_num] = str(verse_text) + return verse_list + + def get_bible_chapter(self, version, book_name, chapter): + """ + Access and decode Bibles via the BibleGateway website. + + :param version: The version of the Bible like 31 for New International version. + :param book_name: Name of the Book. + :param chapter: Chapter number. + """ + log.debug('BGExtract.get_bible_chapter("{version}", "{name}", "{chapter}")'.format(version=version, + name=book_name, + chapter=chapter)) + url_book_name = urllib.parse.quote(book_name.encode("utf-8")) + url_params = 'search={name}+{chapter}&version={version}'.format(name=url_book_name, + chapter=chapter, + version=version) + soup = get_soup_for_bible_ref( + 'http://biblegateway.com/passage/?{url}'.format(url=url_params), + pre_parse_regex=r'', pre_parse_substitute='') + if not soup: + return None + div = soup.find('div', 'result-text-style-normal') + if not div: + return None + self._clean_soup(div) + span_list = div.find_all('span', 'text') + log.debug('Span list: {span}'.format(span=span_list)) + if not span_list: + # If we don't get any spans then we must have the old HTML format + verse_list = self._extract_verses_old(div) + else: + verse_list = self._extract_verses(span_list) + if not verse_list: + log.debug('No content found in the BibleGateway response.') + send_error_message('parse') + return None + return SearchResults(book_name, chapter, verse_list) + + def get_books_from_http(self, version): + """ + Load a list of all books a Bible contains from BibleGateway website. + + :param version: The version of the Bible like NIV for New International Version + """ + log.debug('BGExtract.get_books_from_http("{version}")'.format(version=version)) + url_params = urllib.parse.urlencode({'action': 'getVersionInfo', 'vid': '{version}'.format(version=version)}) + reference_url = 'http://biblegateway.com/versions/?{url}#books'.format(url=url_params) + page = get_web_page(reference_url) + if not page: + send_error_message('download') + return None + page_source = page.read() + try: + page_source = str(page_source, 'utf8') + except UnicodeDecodeError: + page_source = str(page_source, 'cp1251') + try: + soup = BeautifulSoup(page_source, 'lxml') + except Exception: + log.error('BeautifulSoup could not parse the Bible page.') + send_error_message('parse') + return None + if not soup: + send_error_message('parse') + return None + self.application.process_events() + content = soup.find('table', 'infotable') + if content: + content = content.find_all('tr') + if not content: + log.error('No books found in the Biblegateway response.') + send_error_message('parse') + return None + books = [] + for book in content: + book = book.find('td') + if book: + books.append(book.contents[1]) + return books + + def get_bibles_from_http(self): + """ + Load a list of bibles from BibleGateway website. + + returns a list in the form [(biblename, biblekey, language_code)] + """ + log.debug('BGExtract.get_bibles_from_http') + bible_url = 'https://biblegateway.com/versions/' + soup = get_soup_for_bible_ref(bible_url) + if not soup: + return None + bible_select = soup.find('select', {'class': 'search-translation-select'}) + if not bible_select: + log.debug('No select tags found - did site change?') + return None + option_tags = bible_select.find_all('option') + if not option_tags: + log.debug('No option tags found - did site change?') + return None + current_lang = '' + bibles = [] + for ot in option_tags: + tag_class = '' + try: + tag_class = ot['class'][0] + except KeyError: + tag_class = '' + tag_text = ot.get_text() + if tag_class == 'lang': + current_lang = tag_text[tag_text.find('(') + 1:tag_text.find(')')].lower() + elif tag_class == 'spacer': + continue + else: + bibles.append((tag_text, ot['value'], current_lang)) + return bibles + + +class BSExtract(RegistryProperties): + """ + Extract verses from Bibleserver.com + """ + def __init__(self, proxy_url=None): + log.debug('BSExtract.init("{url}")'.format(url=proxy_url)) + self.proxy_url = proxy_url + socket.setdefaulttimeout(30) + + def get_bible_chapter(self, version, book_name, chapter): + """ + Access and decode bibles via Bibleserver mobile website + + :param version: The version of the bible like NIV for New International Version + :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung + :param chapter: Chapter number + """ + log.debug('BSExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, + book=book_name, + chapter=chapter)) + url_version = urllib.parse.quote(version.encode("utf-8")) + url_book_name = urllib.parse.quote(book_name.encode("utf-8")) + chapter_url = 'http://m.bibleserver.com/text/{version}/{name}{chapter:d}'.format(version=url_version, + name=url_book_name, + chapter=chapter) + header = ('Accept-Language', 'en') + soup = get_soup_for_bible_ref(chapter_url, header) + if not soup: + return None + self.application.process_events() + content = soup.find('div', 'content') + if not content: + log.error('No verses found in the Bibleserver response.') + send_error_message('parse') + return None + content = content.find('div').find_all('div') + verses = {} + for verse in content: + self.application.process_events() + versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class']))) + verses[versenumber] = verse.contents[1].rstrip('\n') + return SearchResults(book_name, chapter, verses) + + def get_books_from_http(self, version): + """ + Load a list of all books a Bible contains from Bibleserver mobile website. + + :param version: The version of the Bible like NIV for New International Version + """ + log.debug('BSExtract.get_books_from_http("{version}")'.format(version=version)) + url_version = urllib.parse.quote(version.encode("utf-8")) + chapter_url = 'http://m.bibleserver.com/overlay/selectBook?translation={version}'.format(version=url_version) + soup = get_soup_for_bible_ref(chapter_url) + if not soup: + return None + content = soup.find('ul') + if not content: + log.error('No books found in the Bibleserver response.') + send_error_message('parse') + return None + content = content.find_all('li') + return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)] + + def get_bibles_from_http(self): + """ + Load a list of bibles from Bibleserver website. + + returns a list in the form [(biblename, biblekey, language_code)] + """ + log.debug('BSExtract.get_bibles_from_http') + bible_url = 'http://www.bibleserver.com/index.php?language=2' + soup = get_soup_for_bible_ref(bible_url) + if not soup: + return None + bible_links = soup.find_all('a', {'class': 'trlCell'}) + if not bible_links: + log.debug('No a tags found - did site change?') + return None + bibles = [] + for link in bible_links: + bible_name = link.get_text() + # Skip any audio + if 'audio' in bible_name.lower(): + continue + try: + bible_link = link['href'] + bible_key = bible_link[bible_link.rfind('/') + 1:] + css_classes = link['class'] + except KeyError: + log.debug('No href/class attribute found - did site change?') + language_code = '' + for css_class in css_classes: + if css_class.startswith('fl_'): + try: + language_code = BIBLESERVER_LANGUAGE_CODE[css_class] + except KeyError: + language_code = '' + bibles.append((bible_name, bible_key, language_code)) + return bibles + + +class CWExtract(RegistryProperties): + """ + Extract verses from CrossWalk/BibleStudyTools + """ + def __init__(self, proxy_url=None): + log.debug('CWExtract.init("{url}")'.format(url=proxy_url)) + self.proxy_url = proxy_url + socket.setdefaulttimeout(30) + + def get_bible_chapter(self, version, book_name, chapter): + """ + Access and decode bibles via the Crosswalk website + + :param version: The version of the Bible like niv for New International Version + :param book_name: Text name of in english e.g. 'gen' for Genesis + :param chapter: Chapter number + """ + log.debug('CWExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, + book=book_name, + chapter=chapter)) + url_book_name = book_name.replace(' ', '-') + url_book_name = url_book_name.lower() + url_book_name = urllib.parse.quote(url_book_name.encode("utf-8")) + chapter_url = 'http://www.biblestudytools.com/{version}/{book}/{chapter}.html'.format(version=version, + book=url_book_name, + chapter=chapter) + soup = get_soup_for_bible_ref(chapter_url) + if not soup: + return None + self.application.process_events() + verses_div = soup.find_all('div', 'verse') + if not verses_div: + log.error('No verses found in the CrossWalk response.') + send_error_message('parse') + return None + verses = {} + for verse in verses_div: + self.application.process_events() + verse_number = int(verse.find('strong').contents[0]) + verse_span = verse.find('span') + tags_to_remove = verse_span.find_all(['a', 'sup']) + for tag in tags_to_remove: + tag.decompose() + verse_text = verse_span.get_text() + self.application.process_events() + # Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and . + verse_text = verse_text.strip('\n\r\t ') + verse_text = REDUCE_SPACES_REGEX.sub(' ', verse_text) + verse_text = FIX_PUNKCTUATION_REGEX.sub(r'\1', verse_text) + verses[verse_number] = verse_text + return SearchResults(book_name, chapter, verses) + + def get_books_from_http(self, version): + """ + Load a list of all books a Bible contain from the Crosswalk website. + + :param version: The version of the bible like NIV for New International Version + """ + log.debug('CWExtract.get_books_from_http("{version}")'.format(version=version)) + chapter_url = 'http://www.biblestudytools.com/{version}/'.format(version=version) + soup = get_soup_for_bible_ref(chapter_url) + if not soup: + return None + content = soup.find_all('h4', {'class': 'small-header'}) + if not content: + log.error('No books found in the Crosswalk response.') + send_error_message('parse') + return None + books = [] + for book in content: + books.append(book.contents[0]) + return books + + def get_bibles_from_http(self): + """ + Load a list of bibles from Crosswalk website. + returns a list in the form [(biblename, biblekey, language_code)] + """ + log.debug('CWExtract.get_bibles_from_http') + bible_url = 'http://www.biblestudytools.com/bible-versions/' + soup = get_soup_for_bible_ref(bible_url) + if not soup: + return None + h4_tags = soup.find_all('h4', {'class': 'small-header'}) + if not h4_tags: + log.debug('No h4 tags found - did site change?') + return None + bibles = [] + for h4t in h4_tags: + short_name = None + if h4t.span: + short_name = h4t.span.get_text().strip().lower() + else: + log.error('No span tag found - did site change?') + return None + if not short_name: + continue + h4t.span.extract() + tag_text = h4t.get_text().strip() + # The names of non-english bibles has their language in parentheses at the end + if tag_text.endswith(')'): + language = tag_text[tag_text.rfind('(') + 1:-1] + if language in CROSSWALK_LANGUAGES: + language_code = CROSSWALK_LANGUAGES[language] + else: + language_code = '' + # ... except for those that don't... + elif 'latin' in tag_text.lower(): + language_code = 'la' + elif 'la biblia' in tag_text.lower() or 'nueva' in tag_text.lower(): + language_code = 'es' + elif 'chinese' in tag_text.lower(): + language_code = 'zh' + elif 'greek' in tag_text.lower(): + language_code = 'el' + elif 'nova' in tag_text.lower(): + language_code = 'pt' + else: + language_code = 'en' + bibles.append((tag_text, short_name, language_code)) + return bibles + + class HTTPBible(BibleImport, RegistryProperties): log.info('{name} HTTPBible loaded'.format(name=__name__)) diff --git a/openlp/plugins/bibles/lib/importers/biblegateway.py b/openlp/plugins/bibles/lib/importers/biblegateway.py deleted file mode 100644 index f3caa2204..000000000 --- a/openlp/plugins/bibles/lib/importers/biblegateway.py +++ /dev/null @@ -1,313 +0,0 @@ -# -*- coding: utf-8 -*- -# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 - -############################################################################### -# OpenLP - Open Source Lyrics Projection # -# --------------------------------------------------------------------------- # -# Copyright (c) 2008-2016 OpenLP Developers # -# --------------------------------------------------------------------------- # -# This program is free software; you can redistribute it and/or modify it # -# under the terms of the GNU General Public License as published by the Free # -# Software Foundation; version 2 of the License. # -# # -# This program is distributed in the hope that it will be useful, but WITHOUT # -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # -# more details. # -# # -# You should have received a copy of the GNU General Public License along # -# with this program; if not, write to the Free Software Foundation, Inc., 59 # -# Temple Place, Suite 330, Boston, MA 02111-1307 USA # -############################################################################### -""" -The :mod:`biblegateway` module enables OpenLP to retrieve scripture from http://biblegateway.com. -""" -import logging -import socket -import urllib.parse -import urllib.error - -from bs4 import BeautifulSoup, NavigableString, Tag - -from openlp.core.common import RegistryProperties -from openlp.core.lib.webpagereader import get_web_page -from openlp.plugins.bibles.lib import SearchResults -from openlp.plugins.bibles.lib.http import get_soup_for_bible_ref, send_error_message - -UGLY_CHARS = { - '\u2014': ' - ', - '\u2018': '\'', - '\u2019': '\'', - '\u201c': '"', - '\u201d': '"', - ' ': ' ' -} - -log = logging.getLogger(__name__) - - -class BGExtract(RegistryProperties): - """ - Extract verses from BibleGateway - """ - def __init__(self, proxy_url=None): - log.debug('BGExtract.init("{url}")'.format(url=proxy_url)) - self.proxy_url = proxy_url - socket.setdefaulttimeout(30) - - def _remove_elements(self, parent, tag, class_=None): - """ - Remove a particular element from the BeautifulSoup tree. - - :param parent: The element from which items need to be removed. - :param tag: A string of the tab type, e.g. "div" - :param class_: An HTML class attribute for further qualification. - """ - if class_: - all_tags = parent.find_all(tag, class_) - else: - all_tags = parent.find_all(tag) - for element in all_tags: - element.extract() - - def _extract_verse(self, tag): - """ - Extract a verse (or part of a verse) from a tag. - - :param tag: The BeautifulSoup Tag element with the stuff we want. - """ - if isinstance(tag, NavigableString): - return None, str(tag) - elif tag.get('class') and (tag.get('class')[0] == 'versenum' or tag.get('class')[0] == 'versenum mid-line'): - verse = str(tag.string).replace('[', '').replace(']', '').strip() - return verse, None - elif tag.get('class') and tag.get('class')[0] == 'chapternum': - verse = '1' - return verse, None - else: - verse = None - text = '' - for child in tag.contents: - c_verse, c_text = self._extract_verse(child) - if c_verse: - verse = c_verse - if text and c_text: - text += c_text - elif c_text is not None: - text = c_text - return verse, text - - def _clean_soup(self, tag): - """ - Remove all the rubbish from the HTML page. - - :param tag: The base tag within which we want to remove stuff. - """ - self._remove_elements(tag, 'sup', 'crossreference') - self._remove_elements(tag, 'sup', 'footnote') - self._remove_elements(tag, 'div', 'footnotes') - self._remove_elements(tag, 'div', 'crossrefs') - self._remove_elements(tag, 'h3') - self._remove_elements(tag, 'h4') - self._remove_elements(tag, 'h5') - - def _extract_verses(self, tags): - """ - Extract all the verses from a pre-prepared list of HTML tags. - - :param tags: A list of BeautifulSoup Tag elements. - """ - verses = [] - tags = tags[::-1] - current_text = '' - for tag in tags: - verse = None - text = '' - for child in tag.contents: - c_verse, c_text = self._extract_verse(child) - if c_verse: - verse = c_verse - if text and c_text: - text += c_text - elif c_text is not None: - text = c_text - if not verse: - current_text = text + ' ' + current_text - else: - text += ' ' + current_text - current_text = '' - if text: - for old, new in UGLY_CHARS.items(): - text = text.replace(old, new) - text = ' '.join(text.split()) - if verse and text: - verse = verse.strip() - try: - verse = int(verse) - except ValueError: - verse_parts = verse.split('-') - if len(verse_parts) > 1: - verse = int(verse_parts[0]) - except TypeError: - log.warning('Illegal verse number: {verse:d}'.format(verse=verse)) - verses.append((verse, text)) - verse_list = {} - for verse, text in verses[::-1]: - verse_list[verse] = text - return verse_list - - def _extract_verses_old(self, div): - """ - Use the old style of parsing for those Bibles on BG who mysteriously have not been migrated to the new (still - broken) HTML. - - :param div: The parent div. - """ - verse_list = {} - # Cater for inconsistent mark up in the first verse of a chapter. - first_verse = div.find('versenum') - if first_verse and first_verse.contents: - verse_list[1] = str(first_verse.contents[0]) - for verse in div('sup', 'versenum'): - raw_verse_num = verse.next_element - clean_verse_num = 0 - # Not all verses exist in all translations and may or may not be represented by a verse number. If they are - # not fine, if they are it will probably be in a format that breaks int(). We will then have no idea what - # garbage may be sucked in to the verse text so if we do not get a clean int() then ignore the verse - # completely. - try: - clean_verse_num = int(str(raw_verse_num)) - except ValueError: - verse_parts = str(raw_verse_num).split('-') - if len(verse_parts) > 1: - clean_verse_num = int(verse_parts[0]) - except TypeError: - log.warning('Illegal verse number: {verse:d}'.format(verse=raw_verse_num)) - if clean_verse_num: - verse_text = raw_verse_num.next_element - part = raw_verse_num.next_element.next_element - while not (isinstance(part, Tag) and part.get('class')[0] == 'versenum'): - # While we are still in the same verse grab all the text. - if isinstance(part, NavigableString): - verse_text += part - if isinstance(part.next_element, Tag) and part.next_element.name == 'div': - # Run out of verses so stop. - break - part = part.next_element - verse_list[clean_verse_num] = str(verse_text) - return verse_list - - def get_bible_chapter(self, version, book_name, chapter): - """ - Access and decode Bibles via the BibleGateway website. - - :param version: The version of the Bible like 31 for New International version. - :param book_name: Name of the Book. - :param chapter: Chapter number. - """ - log.debug('BGExtract.get_bible_chapter("{version}", "{name}", "{chapter}")'.format(version=version, - name=book_name, - chapter=chapter)) - url_book_name = urllib.parse.quote(book_name.encode("utf-8")) - url_params = 'search={name}+{chapter}&version={version}'.format(name=url_book_name, - chapter=chapter, - version=version) - soup = get_soup_for_bible_ref( - 'http://biblegateway.com/passage/?{url}'.format(url=url_params), - pre_parse_regex=r'', pre_parse_substitute='') - if not soup: - return None - div = soup.find('div', 'result-text-style-normal') - if not div: - return None - self._clean_soup(div) - span_list = div.find_all('span', 'text') - log.debug('Span list: {span}'.format(span=span_list)) - if not span_list: - # If we don't get any spans then we must have the old HTML format - verse_list = self._extract_verses_old(div) - else: - verse_list = self._extract_verses(span_list) - if not verse_list: - log.debug('No content found in the BibleGateway response.') - send_error_message('parse') - return None - return SearchResults(book_name, chapter, verse_list) - - def get_books_from_http(self, version): - """ - Load a list of all books a Bible contains from BibleGateway website. - - :param version: The version of the Bible like NIV for New International Version - """ - log.debug('BGExtract.get_books_from_http("{version}")'.format(version=version)) - url_params = urllib.parse.urlencode({'action': 'getVersionInfo', 'vid': '{version}'.format(version=version)}) - reference_url = 'http://biblegateway.com/versions/?{url}#books'.format(url=url_params) - page = get_web_page(reference_url) - if not page: - send_error_message('download') - return None - page_source = page.read() - try: - page_source = str(page_source, 'utf8') - except UnicodeDecodeError: - page_source = str(page_source, 'cp1251') - try: - soup = BeautifulSoup(page_source, 'lxml') - except Exception: - log.error('BeautifulSoup could not parse the Bible page.') - send_error_message('parse') - return None - if not soup: - send_error_message('parse') - return None - self.application.process_events() - content = soup.find('table', 'infotable') - if content: - content = content.find_all('tr') - if not content: - log.error('No books found in the Biblegateway response.') - send_error_message('parse') - return None - books = [] - for book in content: - book = book.find('td') - if book: - books.append(book.contents[1]) - return books - - def get_bibles_from_http(self): - """ - Load a list of bibles from BibleGateway website. - - returns a list in the form [(biblename, biblekey, language_code)] - """ - log.debug('BGExtract.get_bibles_from_http') - bible_url = 'https://biblegateway.com/versions/' - soup = get_soup_for_bible_ref(bible_url) - if not soup: - return None - bible_select = soup.find('select', {'class': 'search-translation-select'}) - if not bible_select: - log.debug('No select tags found - did site change?') - return None - option_tags = bible_select.find_all('option') - if not option_tags: - log.debug('No option tags found - did site change?') - return None - current_lang = '' - bibles = [] - for ot in option_tags: - tag_class = '' - try: - tag_class = ot['class'][0] - except KeyError: - tag_class = '' - tag_text = ot.get_text() - if tag_class == 'lang': - current_lang = tag_text[tag_text.find('(') + 1:tag_text.find(')')].lower() - elif tag_class == 'spacer': - continue - else: - bibles.append((tag_text, ot['value'], current_lang)) - return bibles diff --git a/openlp/plugins/bibles/lib/importers/bibleserver.py b/openlp/plugins/bibles/lib/importers/bibleserver.py deleted file mode 100644 index 16924d84a..000000000 --- a/openlp/plugins/bibles/lib/importers/bibleserver.py +++ /dev/null @@ -1,162 +0,0 @@ -# -*- coding: utf-8 -*- -# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 - -############################################################################### -# OpenLP - Open Source Lyrics Projection # -# --------------------------------------------------------------------------- # -# Copyright (c) 2008-2016 OpenLP Developers # -# --------------------------------------------------------------------------- # -# This program is free software; you can redistribute it and/or modify it # -# under the terms of the GNU General Public License as published by the Free # -# Software Foundation; version 2 of the License. # -# # -# This program is distributed in the hope that it will be useful, but WITHOUT # -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # -# more details. # -# # -# You should have received a copy of the GNU General Public License along # -# with this program; if not, write to the Free Software Foundation, Inc., 59 # -# Temple Place, Suite 330, Boston, MA 02111-1307 USA # -############################################################################### -""" -The :mod:`bibleserver` module enables OpenLP to retrieve scripture from http://bibleserver.com. -""" -import logging -import re -import socket -import urllib.parse -import urllib.error - -from openlp.core.common import RegistryProperties -from openlp.plugins.bibles.lib import SearchResults -from openlp.plugins.bibles.lib.http import get_soup_for_bible_ref, send_error_message - -VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*') - -BIBLESERVER_LANGUAGE_CODE = { - 'fl_1': 'de', - 'fl_2': 'en', - 'fl_3': 'fr', - 'fl_4': 'it', - 'fl_5': 'es', - 'fl_6': 'pt', - 'fl_7': 'ru', - 'fl_8': 'sv', - 'fl_9': 'no', - 'fl_10': 'nl', - 'fl_11': 'cs', - 'fl_12': 'sk', - 'fl_13': 'ro', - 'fl_14': 'hr', - 'fl_15': 'hu', - 'fl_16': 'bg', - 'fl_17': 'ar', - 'fl_18': 'tr', - 'fl_19': 'pl', - 'fl_20': 'da', - 'fl_21': 'zh' -} - -log = logging.getLogger(__name__) - - -class BSExtract(RegistryProperties): - """ - Extract verses from Bibleserver.com - """ - def __init__(self, proxy_url=None): - log.debug('BSExtract.init("{url}")'.format(url=proxy_url)) - self.proxy_url = proxy_url - socket.setdefaulttimeout(30) - - def get_bible_chapter(self, version, book_name, chapter): - """ - Access and decode bibles via Bibleserver mobile website - - :param version: The version of the bible like NIV for New International Version - :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung - :param chapter: Chapter number - """ - log.debug('BSExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, - book=book_name, - chapter=chapter)) - url_version = urllib.parse.quote(version.encode("utf-8")) - url_book_name = urllib.parse.quote(book_name.encode("utf-8")) - chapter_url = 'http://m.bibleserver.com/text/{version}/{name}{chapter:d}'.format(version=url_version, - name=url_book_name, - chapter=chapter) - header = ('Accept-Language', 'en') - soup = get_soup_for_bible_ref(chapter_url, header) - if not soup: - return None - self.application.process_events() - content = soup.find('div', 'content') - if not content: - log.error('No verses found in the Bibleserver response.') - send_error_message('parse') - return None - content = content.find('div').find_all('div') - verses = {} - for verse in content: - self.application.process_events() - versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class']))) - verses[versenumber] = verse.contents[1].rstrip('\n') - return SearchResults(book_name, chapter, verses) - - def get_books_from_http(self, version): - """ - Load a list of all books a Bible contains from Bibleserver mobile website. - - :param version: The version of the Bible like NIV for New International Version - """ - log.debug('BSExtract.get_books_from_http("{version}")'.format(version=version)) - url_version = urllib.parse.quote(version.encode("utf-8")) - chapter_url = 'http://m.bibleserver.com/overlay/selectBook?translation={version}'.format(version=url_version) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - content = soup.find('ul') - if not content: - log.error('No books found in the Bibleserver response.') - send_error_message('parse') - return None - content = content.find_all('li') - return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)] - - def get_bibles_from_http(self): - """ - Load a list of bibles from Bibleserver website. - - returns a list in the form [(biblename, biblekey, language_code)] - """ - log.debug('BSExtract.get_bibles_from_http') - bible_url = 'http://www.bibleserver.com/index.php?language=2' - soup = get_soup_for_bible_ref(bible_url) - if not soup: - return None - bible_links = soup.find_all('a', {'class': 'trlCell'}) - if not bible_links: - log.debug('No a tags found - did site change?') - return None - bibles = [] - for link in bible_links: - bible_name = link.get_text() - # Skip any audio - if 'audio' in bible_name.lower(): - continue - try: - bible_link = link['href'] - bible_key = bible_link[bible_link.rfind('/') + 1:] - css_classes = link['class'] - except KeyError: - log.debug('No href/class attribute found - did site change?') - language_code = '' - for css_class in css_classes: - if css_class.startswith('fl_'): - try: - language_code = BIBLESERVER_LANGUAGE_CODE[css_class] - except KeyError: - language_code = '' - bibles.append((bible_name, bible_key, language_code)) - return bibles diff --git a/openlp/plugins/bibles/lib/importers/crosswalk.py b/openlp/plugins/bibles/lib/importers/crosswalk.py deleted file mode 100644 index fb354dd29..000000000 --- a/openlp/plugins/bibles/lib/importers/crosswalk.py +++ /dev/null @@ -1,171 +0,0 @@ -# -*- coding: utf-8 -*- -# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 - -############################################################################### -# OpenLP - Open Source Lyrics Projection # -# --------------------------------------------------------------------------- # -# Copyright (c) 2008-2016 OpenLP Developers # -# --------------------------------------------------------------------------- # -# This program is free software; you can redistribute it and/or modify it # -# under the terms of the GNU General Public License as published by the Free # -# Software Foundation; version 2 of the License. # -# # -# This program is distributed in the hope that it will be useful, but WITHOUT # -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # -# more details. # -# # -# You should have received a copy of the GNU General Public License along # -# with this program; if not, write to the Free Software Foundation, Inc., 59 # -# Temple Place, Suite 330, Boston, MA 02111-1307 USA # -############################################################################### -""" -The :mod:`crosswalk` module enables OpenLP to retrieve scripture from www.biblestudytools.com. -""" -import logging -import re -import socket -import urllib.parse -import urllib.error - -from openlp.core.common import RegistryProperties -from openlp.plugins.bibles.lib import SearchResults -from openlp.plugins.bibles.lib.http import get_soup_for_bible_ref, send_error_message - -FIX_PUNKCTUATION_REGEX = re.compile(r'[ ]+([.,;])') -REDUCE_SPACES_REGEX = re.compile(r'[ ]{2,}') - - -CROSSWALK_LANGUAGES = { - 'Portuguese': 'pt', - 'German': 'de', - 'Italian': 'it', - 'Español': 'es', - 'French': 'fr', - 'Dutch': 'nl' -} - -log = logging.getLogger(__name__) - - -class CWExtract(RegistryProperties): - """ - Extract verses from CrossWalk/BibleStudyTools - """ - def __init__(self, proxy_url=None): - log.debug('CWExtract.init("{url}")'.format(url=proxy_url)) - self.proxy_url = proxy_url - socket.setdefaulttimeout(30) - - def get_bible_chapter(self, version, book_name, chapter): - """ - Access and decode bibles via the Crosswalk website - - :param version: The version of the Bible like niv for New International Version - :param book_name: Text name of in english e.g. 'gen' for Genesis - :param chapter: Chapter number - """ - log.debug('CWExtract.get_bible_chapter("{version}", "{book}", "{chapter}")'.format(version=version, - book=book_name, - chapter=chapter)) - url_book_name = book_name.replace(' ', '-') - url_book_name = url_book_name.lower() - url_book_name = urllib.parse.quote(url_book_name.encode("utf-8")) - chapter_url = 'http://www.biblestudytools.com/{version}/{book}/{chapter}.html'.format(version=version, - book=url_book_name, - chapter=chapter) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - self.application.process_events() - verses_div = soup.find_all('div', 'verse') - if not verses_div: - log.error('No verses found in the CrossWalk response.') - send_error_message('parse') - return None - verses = {} - for verse in verses_div: - self.application.process_events() - verse_number = int(verse.find('strong').contents[0]) - verse_span = verse.find('span') - tags_to_remove = verse_span.find_all(['a', 'sup']) - for tag in tags_to_remove: - tag.decompose() - verse_text = verse_span.get_text() - self.application.process_events() - # Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and . - verse_text = verse_text.strip('\n\r\t ') - verse_text = REDUCE_SPACES_REGEX.sub(' ', verse_text) - verse_text = FIX_PUNKCTUATION_REGEX.sub(r'\1', verse_text) - verses[verse_number] = verse_text - return SearchResults(book_name, chapter, verses) - - def get_books_from_http(self, version): - """ - Load a list of all books a Bible contain from the Crosswalk website. - - :param version: The version of the bible like NIV for New International Version - """ - log.debug('CWExtract.get_books_from_http("{version}")'.format(version=version)) - chapter_url = 'http://www.biblestudytools.com/{version}/'.format(version=version) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - content = soup.find_all('h4', {'class': 'small-header'}) - if not content: - log.error('No books found in the Crosswalk response.') - send_error_message('parse') - return None - books = [] - for book in content: - books.append(book.contents[0]) - return books - - def get_bibles_from_http(self): - """ - Load a list of bibles from Crosswalk website. - returns a list in the form [(biblename, biblekey, language_code)] - """ - log.debug('CWExtract.get_bibles_from_http') - bible_url = 'http://www.biblestudytools.com/bible-versions/' - soup = get_soup_for_bible_ref(bible_url) - if not soup: - return None - h4_tags = soup.find_all('h4', {'class': 'small-header'}) - if not h4_tags: - log.debug('No h4 tags found - did site change?') - return None - bibles = [] - for h4t in h4_tags: - short_name = None - if h4t.span: - short_name = h4t.span.get_text().strip().lower() - else: - log.error('No span tag found - did site change?') - return None - if not short_name: - continue - h4t.span.extract() - tag_text = h4t.get_text().strip() - # The names of non-english bibles has their language in parentheses at the end - if tag_text.endswith(')'): - language = tag_text[tag_text.rfind('(') + 1:-1] - if language in CROSSWALK_LANGUAGES: - language_code = CROSSWALK_LANGUAGES[language] - else: - language_code = '' - # ... except for those that don't... - elif 'latin' in tag_text.lower(): - language_code = 'la' - elif 'la biblia' in tag_text.lower() or 'nueva' in tag_text.lower(): - language_code = 'es' - elif 'chinese' in tag_text.lower(): - language_code = 'zh' - elif 'greek' in tag_text.lower(): - language_code = 'el' - elif 'nova' in tag_text.lower(): - language_code = 'pt' - else: - language_code = 'en' - bibles.append((tag_text, short_name, language_code)) - return bibles diff --git a/tests/functional/openlp_plugins/bibles/test_bibleserver.py b/tests/functional/openlp_plugins/bibles/test_bibleserver.py index 0849a63e3..839c81008 100644 --- a/tests/functional/openlp_plugins/bibles/test_bibleserver.py +++ b/tests/functional/openlp_plugins/bibles/test_bibleserver.py @@ -20,13 +20,41 @@ # Temple Place, Suite 330, Boston, MA 02111-1307 USA # ############################################################################### """ -This module contains tests for the bibleserver module of the Bibles plugin. +This module contains tests for the http module of the Bibles plugin. """ from unittest import TestCase from bs4 import BeautifulSoup from tests.functional import patch, MagicMock -from openlp.plugins.bibles.lib.importers.bibleserver import BSExtract +from openlp.plugins.bibles.lib.importers.http import BSExtract + +# TODO: Items left to test +# BGExtract +# __init__ +# _remove_elements +# _extract_verse +# _clean_soup +# _extract_verses +# _extract_verses_old +# get_bible_chapter +# get_books_from_http +# _get_application +# CWExtract +# __init__ +# get_bible_chapter +# get_books_from_http +# _get_application +# HTTPBible +# __init__ +# do_import +# get_verses +# get_chapter +# get_books +# get_chapter_count +# get_verse_count +# _get_application +# get_soup_for_bible_ref +# send_error_message class TestBSExtract(TestCase): @@ -40,12 +68,11 @@ class TestBSExtract(TestCase): # get_books_from_http # _get_application def setUp(self): - self.get_soup_for_bible_ref_patcher = patch( - 'openlp.plugins.bibles.lib.importers.bibleserver.get_soup_for_bible_ref') - self.log_patcher = patch('openlp.plugins.bibles.lib.importers.bibleserver.log') - self.send_error_message_patcher = patch('openlp.plugins.bibles.lib.importers.bibleserver.send_error_message') - self.socket_patcher = patch('openlp.plugins.bibles.lib.http.socket') - self.urllib_patcher = patch('openlp.plugins.bibles.lib.importers.bibleserver.urllib') + self.get_soup_for_bible_ref_patcher = patch('openlp.plugins.bibles.lib.importers.http.get_soup_for_bible_ref') + self.log_patcher = patch('openlp.plugins.bibles.lib.importers.http.log') + self.send_error_message_patcher = patch('openlp.plugins.bibles.lib.importers.http.send_error_message') + self.socket_patcher = patch('openlp.plugins.bibles.lib.importers.http.socket') + self.urllib_patcher = patch('openlp.plugins.bibles.lib.importers.http.urllib') self.mock_get_soup_for_bible_ref = self.get_soup_for_bible_ref_patcher.start() self.mock_log = self.log_patcher.start() diff --git a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py index fd557eece..084bfa476 100644 --- a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py +++ b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py @@ -25,9 +25,7 @@ from unittest import TestCase, skip from openlp.core.common import Registry -from openlp.plugins.bibles.lib.importers.biblegateway import BGExtract -from openlp.plugins.bibles.lib.importers.bibleserver import BSExtract -from openlp.plugins.bibles.lib.importers.crosswalk import CWExtract +from openlp.plugins.bibles.lib.importers.http import BGExtract, CWExtract, BSExtract from tests.interfaces import MagicMock From aefcd48cc3d92a1149ee0f1228111379c15d8fcd Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sat, 20 Aug 2016 21:32:25 +0100 Subject: [PATCH 09/11] reverted changes to bibles http.py --- openlp/plugins/bibles/lib/{ => importers}/http.py | 0 openlp/plugins/bibles/lib/manager.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename openlp/plugins/bibles/lib/{ => importers}/http.py (100%) diff --git a/openlp/plugins/bibles/lib/http.py b/openlp/plugins/bibles/lib/importers/http.py similarity index 100% rename from openlp/plugins/bibles/lib/http.py rename to openlp/plugins/bibles/lib/importers/http.py diff --git a/openlp/plugins/bibles/lib/manager.py b/openlp/plugins/bibles/lib/manager.py index 2734411f5..d2286bed2 100644 --- a/openlp/plugins/bibles/lib/manager.py +++ b/openlp/plugins/bibles/lib/manager.py @@ -27,7 +27,7 @@ from openlp.core.common import RegistryProperties, AppLocation, Settings, transl from openlp.plugins.bibles.lib import parse_reference, LanguageSelection from openlp.plugins.bibles.lib.db import BibleDB, BibleMeta from .importers.csvbible import CSVBible -from .http import HTTPBible +from .importers.http import HTTPBible from .importers.opensong import OpenSongBible from .importers.osis import OSISBible from .importers.zefania import ZefaniaBible From 3c29427866a37877c21c809318464bcada9df9e0 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sun, 21 Aug 2016 08:39:40 +0100 Subject: [PATCH 10/11] Added some doc strings which I missed --- .../plugins/bibles/lib/importers/opensong.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/openlp/plugins/bibles/lib/importers/opensong.py b/openlp/plugins/bibles/lib/importers/opensong.py index c0a82a4ff..201bea1da 100644 --- a/openlp/plugins/bibles/lib/importers/opensong.py +++ b/openlp/plugins/bibles/lib/importers/opensong.py @@ -42,6 +42,7 @@ class OpenSongBible(BibleImport): Recursively get all text in an objectify element and its child elements. :param element: An objectify element to get the text from + :return: The text content of the element (str) """ verse_text = '' if element.text: @@ -110,6 +111,12 @@ class OpenSongBible(BibleImport): return True def process_books(self, books): + """ + Extract and create the books from the objectified xml + + :param books: Objectified xml + :return: None + """ for book in books: if self.stop_import_flag: break @@ -118,6 +125,13 @@ class OpenSongBible(BibleImport): self.session.commit() def process_chapters(self, book, chapters): + """ + Extract and create the chapters from the objectified xml for the book `book` + + :param book: A database Book object to add the chapters to + :param chapters: Objectified xml containing chapters + :return: None + """ chapter_number = 0 for chapter in chapters: if self.stop_import_flag: @@ -129,6 +143,14 @@ class OpenSongBible(BibleImport): ).format(name=book.name, chapter=chapter_number)) def process_verses(self, book, chapter_number, verses): + """ + Extract and create the verses from the objectified xml + + :param book: A database Book object + :param chapter_number: The chapter number to add the verses to (int) + :param verses: Objectified xml containing verses + :return: None + """ verse_number = 0 for verse in verses: if self.stop_import_flag: @@ -139,6 +161,9 @@ class OpenSongBible(BibleImport): def do_import(self, bible_name=None): """ Loads an Open Song Bible from a file. + + :param bible_name: The name of the bible being imported + :return: True if import completed, False if import was unsuccessful """ log.debug('Starting OpenSong import from "{name}"'.format(name=self.filename)) try: From 28d94b7d10162a9c6f4a2f6830b47529ed015f49 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sun, 21 Aug 2016 21:36:59 +0100 Subject: [PATCH 11/11] moved some static methods to module level --- .../plugins/bibles/lib/importers/opensong.py | 153 +++++++++--------- .../bibles/test_opensongimport.py | 52 +++--- 2 files changed, 103 insertions(+), 102 deletions(-) diff --git a/openlp/plugins/bibles/lib/importers/opensong.py b/openlp/plugins/bibles/lib/importers/opensong.py index 201bea1da..01be8407e 100644 --- a/openlp/plugins/bibles/lib/importers/opensong.py +++ b/openlp/plugins/bibles/lib/importers/opensong.py @@ -32,84 +32,62 @@ from openlp.plugins.bibles.lib.bibleimport import BibleImport log = logging.getLogger(__name__) +def get_text(element): + """ + Recursively get all text in an objectify element and its child elements. + + :param element: An objectify element to get the text from + :return: The text content of the element (str) + """ + verse_text = '' + if element.text: + verse_text = element.text + for sub_element in element.iterchildren(): + verse_text += get_text(sub_element) + if element.tail: + verse_text += element.tail + return verse_text + + +def parse_chapter_number(number, previous_number): + """ + Parse the chapter number + + :param number: The raw data from the xml + :param previous_number: The previous chapter number + :return: Number of current chapter. (Int) + """ + if number: + return int(number.split()[-1]) + return previous_number + 1 + + +def parse_verse_number(number, previous_number): + """ + Parse the verse number retrieved from the xml + + :param number: The raw data from the xml + :param previous_number: The previous verse number + :return: Number of current verse. (Int) + """ + if not number: + return previous_number + 1 + try: + return int(number) + except ValueError: + verse_parts = number.split('-') + if len(verse_parts) > 1: + number = int(verse_parts[0]) + return number + except TypeError: + log.warning('Illegal verse number: {verse_no}'.format(verse_no=str(number))) + return previous_number + 1 + + class OpenSongBible(BibleImport): """ OpenSong Bible format importer class. This class is used to import Bibles from OpenSong's XML format. """ - @staticmethod - def get_text(element): - """ - Recursively get all text in an objectify element and its child elements. - - :param element: An objectify element to get the text from - :return: The text content of the element (str) - """ - verse_text = '' - if element.text: - verse_text = element.text - for sub_element in element.iterchildren(): - verse_text += OpenSongBible.get_text(sub_element) - if element.tail: - verse_text += element.tail - return verse_text - - @staticmethod - def parse_chapter_number(number, previous_number): - """ - Parse the chapter number - - :param number: The raw data from the xml - :param previous_number: The previous chapter number - :return: Number of current chapter. (Int) - """ - if number: - return int(number.split()[-1]) - return previous_number + 1 - - @staticmethod - def parse_verse_number(number, previous_number): - """ - Parse the verse number retrieved from the xml - - :param number: The raw data from the xml - :param previous_number: The previous verse number - :return: Number of current verse. (Int) - """ - if not number: - return previous_number + 1 - try: - return int(number) - except ValueError: - verse_parts = number.split('-') - if len(verse_parts) > 1: - number = int(verse_parts[0]) - return number - except TypeError: - log.warning('Illegal verse number: {verse_no}'.format(verse_no=str(number))) - return previous_number + 1 - - @staticmethod - def validate_file(filename): - """ - Validate the supplied file - - :param filename: The supplied file - :return: True if valid. ValidationError is raised otherwise. - """ - if BibleImport.is_compressed(filename): - raise ValidationError(msg='Compressed file') - bible = BibleImport.parse_xml(filename, use_objectify=True) - root_tag = bible.tag.lower() - if root_tag != 'bible': - if root_tag == 'xmlbible': - # Zefania bibles have a root tag of XMLBIBLE". Sometimes these bibles are referred to as 'OpenSong' - critical_error_message_box( - message=translate('BiblesPlugin.OpenSongImport', - 'Incorrect Bible file type supplied. This looks like a Zefania XML bible, ' - 'please use the Zefania import option.')) - raise ValidationError(msg='Invalid xml.') - return True - def process_books(self, books): """ Extract and create the books from the objectified xml @@ -136,7 +114,7 @@ class OpenSongBible(BibleImport): for chapter in chapters: if self.stop_import_flag: break - chapter_number = self.parse_chapter_number(chapter.attrib['n'], chapter_number) + chapter_number = parse_chapter_number(chapter.attrib['n'], chapter_number) self.process_verses(book, chapter_number, chapter.v) self.wizard.increment_progress_bar(translate('BiblesPlugin.Opensong', 'Importing {name} {chapter}...' @@ -155,8 +133,29 @@ class OpenSongBible(BibleImport): for verse in verses: if self.stop_import_flag: break - verse_number = self.parse_verse_number(verse.attrib['n'], verse_number) - self.create_verse(book.id, chapter_number, verse_number, self.get_text(verse)) + verse_number = parse_verse_number(verse.attrib['n'], verse_number) + self.create_verse(book.id, chapter_number, verse_number, get_text(verse)) + + def validate_file(self, filename): + """ + Validate the supplied file + + :param filename: The supplied file + :return: True if valid. ValidationError is raised otherwise. + """ + if BibleImport.is_compressed(filename): + raise ValidationError(msg='Compressed file') + bible = self.parse_xml(filename, use_objectify=True) + root_tag = bible.tag.lower() + if root_tag != 'bible': + if root_tag == 'xmlbible': + # Zefania bibles have a root tag of XMLBIBLE". Sometimes these bibles are referred to as 'OpenSong' + critical_error_message_box( + message=translate('BiblesPlugin.OpenSongImport', + 'Incorrect Bible file type supplied. This looks like a Zefania XML bible, ' + 'please use the Zefania import option.')) + raise ValidationError(msg='Invalid xml.') + return True def do_import(self, bible_name=None): """ diff --git a/tests/functional/openlp_plugins/bibles/test_opensongimport.py b/tests/functional/openlp_plugins/bibles/test_opensongimport.py index ee4e794c0..0f5c404ac 100644 --- a/tests/functional/openlp_plugins/bibles/test_opensongimport.py +++ b/tests/functional/openlp_plugins/bibles/test_opensongimport.py @@ -33,7 +33,8 @@ from tests.functional import MagicMock, patch, call from tests.helpers.testmixin import TestMixin from openlp.core.common import Registry from openlp.core.lib.exceptions import ValidationError -from openlp.plugins.bibles.lib.importers.opensong import OpenSongBible +from openlp.plugins.bibles.lib.importers.opensong import OpenSongBible, get_text, parse_chapter_number,\ + parse_verse_number from openlp.plugins.bibles.lib.bibleimport import BibleImport TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), @@ -75,7 +76,7 @@ class TestOpenSongImport(TestCase, TestMixin): test_data = objectify.fromstring('') # WHEN: Calling get_text - result = OpenSongBible.get_text(test_data) + result = get_text(test_data) # THEN: A blank string should be returned self.assertEqual(result, '') @@ -92,7 +93,7 @@ class TestOpenSongImport(TestCase, TestMixin): 'sub_tail tail') # WHEN: Calling get_text - result = OpenSongBible.get_text(test_data) + result = get_text(test_data) # THEN: The text returned should be as expected self.assertEqual(result, 'Element text sub_text_tail text sub_text_tail tail sub_text text sub_tail tail') @@ -103,7 +104,7 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: The number 10 represented as a string # WHEN: Calling parse_chapter_nnumber - result = OpenSongBible.parse_chapter_number('10', 0) + result = parse_chapter_number('10', 0) # THEN: The 10 should be returned as an Int self.assertEqual(result, 10) @@ -114,7 +115,7 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: An empty string, and the previous chapter number set as 12 and an instance of OpenSongBible # WHEN: Calling parse_chapter_number - result = OpenSongBible.parse_chapter_number('', 12) + result = parse_chapter_number('', 12) # THEN: parse_chapter_number should increment the previous verse number self.assertEqual(result, 13) @@ -125,7 +126,7 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: The number 15 represented as a string and an instance of OpenSongBible # WHEN: Calling parse_verse_number - result = OpenSongBible.parse_verse_number('15', 0) + result = parse_verse_number('15', 0) # THEN: parse_verse_number should return the verse number self.assertEqual(result, 15) @@ -136,7 +137,7 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: The range 24-26 represented as a string # WHEN: Calling parse_verse_number - result = OpenSongBible.parse_verse_number('24-26', 0) + result = parse_verse_number('24-26', 0) # THEN: parse_verse_number should return the first verse number in the range self.assertEqual(result, 24) @@ -147,7 +148,7 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: An non numeric string represented as a string # WHEN: Calling parse_verse_number - result = OpenSongBible.parse_verse_number('invalid', 41) + result = parse_verse_number('invalid', 41) # THEN: parse_verse_number should increment the previous verse number self.assertEqual(result, 42) @@ -158,7 +159,7 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: An empty string, and the previous verse number set as 14 # WHEN: Calling parse_verse_number - result = OpenSongBible.parse_verse_number('', 14) + result = parse_verse_number('', 14) # THEN: parse_verse_number should increment the previous verse number self.assertEqual(result, 15) @@ -170,7 +171,7 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: A mocked out log, a Tuple, and the previous verse number set as 12 # WHEN: Calling parse_verse_number - result = OpenSongBible.parse_verse_number((1, 2, 3), 12) + result = parse_verse_number((1, 2, 3), 12) # THEN: parse_verse_number should log the verse number it was called with increment the previous verse number mocked_log.warning.assert_called_once_with('Illegal verse number: (1, 2, 3)') @@ -236,14 +237,13 @@ class TestOpenSongImport(TestCase, TestMixin): self.assertFalse(importer.parse_chapter_number.called) @patch('openlp.plugins.bibles.lib.importers.opensong.translate', **{'side_effect': lambda x, y: y}) - def process_chapters_completes_test(self, mocked_translate): + @patch('openlp.plugins.bibles.lib.importers.opensong.parse_chapter_number', **{'side_effect': [1, 2]}) + def process_chapters_completes_test(self, mocked_parse_chapter_number, mocked_translate): """ Test process_chapters when it completes """ # GIVEN: An instance of OpenSongBible importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') - importer.parse_chapter_number = MagicMock() - importer.parse_chapter_number.side_effect = [1, 2] importer.wizard = MagicMock() # WHEN: called with some valid data @@ -263,7 +263,7 @@ class TestOpenSongImport(TestCase, TestMixin): importer.process_chapters(book, [chapter1, chapter2]) # THEN: parse_chapter_number, process_verses and increment_process_bar should have been called - self.assertEqual(importer.parse_chapter_number.call_args_list, [call('1', 0), call('2', 1)]) + self.assertEqual(mocked_parse_chapter_number.call_args_list, [call('1', 0), call('2', 1)]) self.assertEqual( importer.process_verses.call_args_list, [call(book, 1, ['Chapter1 Verses']), call(book, 2, ['Chapter2 Verses'])]) @@ -285,16 +285,14 @@ class TestOpenSongImport(TestCase, TestMixin): # THEN: importer.parse_verse_number not have been called self.assertFalse(importer.parse_verse_number.called) - def process_verses_completes_test(self): + @patch('openlp.plugins.bibles.lib.importers.opensong.parse_verse_number', **{'side_effect': [1, 2]}) + @patch('openlp.plugins.bibles.lib.importers.opensong.get_text', **{'side_effect': ['Verse1 Text', 'Verse2 Text']}) + def process_verses_completes_test(self, mocked_get_text, mocked_parse_verse_number): """ Test process_verses when it completes """ # GIVEN: An instance of OpenSongBible importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') - importer.get_text = MagicMock() - importer.get_text.side_effect = ['Verse1 Text', 'Verse2 Text'] - importer.parse_verse_number = MagicMock() - importer.parse_verse_number.side_effect = [1, 2] importer.wizard = MagicMock() # WHEN: called with some valid data @@ -314,8 +312,8 @@ class TestOpenSongImport(TestCase, TestMixin): importer.process_verses(book, 1, [verse1, verse2]) # THEN: parse_chapter_number, process_verses and increment_process_bar should have been called - self.assertEqual(importer.parse_verse_number.call_args_list, [call('1', 0), call('2', 1)]) - self.assertEqual(importer.get_text.call_args_list, [call(verse1), call(verse2)]) + self.assertEqual(mocked_parse_verse_number.call_args_list, [call('1', 0), call('2', 1)]) + self.assertEqual(mocked_get_text.call_args_list, [call(verse1), call(verse2)]) self.assertEqual( importer.create_verse.call_args_list, [call(1, 1, 1, 'Verse1 Text'), call(1, 1, 2, 'Verse2 Text')]) @@ -327,11 +325,12 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: A mocked is_compressed method which returns True mocked_is_compressed.return_value = True + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') # WHEN: Calling validate_file # THEN: ValidationError should be raised with self.assertRaises(ValidationError) as context: - OpenSongBible.validate_file('file.name') + importer.validate_file('file.name') self.assertEqual(context.exception.msg, 'Compressed file') @patch('openlp.plugins.bibles.lib.importers.opensong.BibleImport.parse_xml') @@ -342,9 +341,10 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: Some test data with an OpenSong Bible "bible" root tag mocked_parse_xml.return_value = objectify.fromstring('') + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') # WHEN: Calling validate_file - result = OpenSongBible.validate_file('file.name') + result = importer.validate_file('file.name') # THEN: A True should be returned self.assertTrue(result) @@ -358,11 +358,12 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: Some test data with a Zefinia "XMLBIBLE" root tag mocked_parse_xml.return_value = objectify.fromstring('') + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') # WHEN: Calling validate_file # THEN: critical_error_message_box should be called and an ValidationError should be raised with self.assertRaises(ValidationError) as context: - OpenSongBible.validate_file('file.name') + importer.validate_file('file.name') self.assertEqual(context.exception.msg, 'Invalid xml.') mocked_message_box.assert_called_once_with( message='Incorrect Bible file type supplied. This looks like a Zefania XML bible, please use the ' @@ -377,11 +378,12 @@ class TestOpenSongImport(TestCase, TestMixin): """ # GIVEN: Some test data with an invalid root tag and an instance of OpenSongBible mocked_parse_xml.return_value = objectify.fromstring('') + importer = OpenSongBible(MagicMock(), path='.', name='.', filename='') # WHEN: Calling validate_file # THEN: ValidationError should be raised, and the critical error message box should not have been called with self.assertRaises(ValidationError) as context: - OpenSongBible.validate_file('file.name') + importer.validate_file('file.name') self.assertEqual(context.exception.msg, 'Invalid xml.') self.assertFalse(mocked_message_box.called)