From 3101d72b483db08042f44f74e8fc2e82747062c4 Mon Sep 17 00:00:00 2001 From: Tomas Groth Date: Sun, 24 Aug 2014 15:40:45 +0100 Subject: [PATCH 1/4] Rewrote OSIS import, added tests. --- openlp/plugins/bibles/lib/opensong.py | 3 +- openlp/plugins/bibles/lib/osis.py | 255 +++++++++--------- .../openlp_plugins/bibles/test_osisimport.py | 162 +++++++++++ tests/resources/bibles/dk1933.json | 16 ++ tests/resources/bibles/kjv.json | 16 ++ tests/resources/bibles/osis-dk1933.xml | 32 +++ tests/resources/bibles/osis-kjv.xml | 41 +++ tests/resources/bibles/osis-web.xml | 109 ++++++++ tests/resources/bibles/web.json | 16 ++ 9 files changed, 522 insertions(+), 128 deletions(-) create mode 100644 tests/functional/openlp_plugins/bibles/test_osisimport.py create mode 100644 tests/resources/bibles/dk1933.json create mode 100644 tests/resources/bibles/kjv.json create mode 100644 tests/resources/bibles/osis-dk1933.xml create mode 100644 tests/resources/bibles/osis-kjv.xml create mode 100644 tests/resources/bibles/osis-web.xml create mode 100644 tests/resources/bibles/web.json diff --git a/openlp/plugins/bibles/lib/opensong.py b/openlp/plugins/bibles/lib/opensong.py index fa8323d7f..dccdbf2cf 100644 --- a/openlp/plugins/bibles/lib/opensong.py +++ b/openlp/plugins/bibles/lib/opensong.py @@ -30,7 +30,7 @@ import logging from lxml import etree, objectify -from openlp.core.common import translate +from openlp.core.common import translate, trace_error_handler from openlp.core.lib.ui import critical_error_message_box from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB @@ -134,6 +134,7 @@ class OpenSongBible(BibleDB): self.session.commit() self.application.process_events() except etree.XMLSyntaxError as inst: + trace_error_handler(log) critical_error_message_box( message=translate('BiblesPlugin.OpenSongImport', 'Incorrect Bible file type supplied. OpenSong Bibles may be ' diff --git a/openlp/plugins/bibles/lib/osis.py b/openlp/plugins/bibles/lib/osis.py index 4f85bef1a..851db39c4 100644 --- a/openlp/plugins/bibles/lib/osis.py +++ b/openlp/plugins/bibles/lib/osis.py @@ -27,14 +27,12 @@ # Temple Place, Suite 330, Boston, MA 02111-1307 USA # ############################################################################### -import os import logging -import chardet -import codecs -import re +from lxml import etree -from openlp.core.common import AppLocation, translate +from openlp.core.common import translate, trace_error_handler from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB +from openlp.core.lib.ui import critical_error_message_box log = logging.getLogger(__name__) @@ -53,143 +51,146 @@ class OSISBible(BibleDB): log.debug(self.__class__.__name__) BibleDB.__init__(self, parent, **kwargs) self.filename = kwargs['filename'] - self.language_regex = re.compile(r'(.*?)') - self.verse_regex = re.compile(r'(.*?)') - self.note_regex = re.compile(r'(.*?)') - self.title_regex = re.compile(r'(.*?)') - self.milestone_regex = re.compile(r'') - self.fi_regex = re.compile(r'(.*?)') - self.rf_regex = re.compile(r'(.*?)') - self.lb_regex = re.compile(r'') - self.lg_regex = re.compile(r'') - self.l_regex = re.compile(r'') - self.w_regex = re.compile(r'') - self.q_regex = re.compile(r'') - self.q1_regex = re.compile(r'') - self.q2_regex = re.compile(r'') - self.trans_regex = re.compile(r'(.*?)') - self.divine_name_regex = re.compile(r'(.*?)') - self.spaces_regex = re.compile(r'([ ]{2,})') - filepath = os.path.join( - AppLocation.get_directory(AppLocation.PluginsDir), 'bibles', 'resources', 'osisbooks.csv') def do_import(self, bible_name=None): """ Loads a Bible from file. """ log.debug('Starting OSIS import from "%s"' % self.filename) - detect_file = None - db_book = None - osis = None + if not isinstance(self.filename, str): + self.filename = str(self.filename, 'utf8') + import_file = None success = True - last_chapter = 0 - match_count = 0 - self.wizard.increment_progress_bar( - translate('BiblesPlugin.OsisImport', 'Detecting encoding (this may take a few minutes)...')) try: - detect_file = open(self.filename, 'r') - details = chardet.detect(detect_file.read(1048576)) - detect_file.seek(0) - lines_in_file = int(len(detect_file.readlines())) - except IOError: - log.exception('Failed to detect OSIS file encoding') - return - finally: - if detect_file: - detect_file.close() - try: - osis = codecs.open(self.filename, 'r', details['encoding']) - repl = replacement - language_id = False - # Decide if the bible probably contains only NT or AT and NT or - # AT, NT and Apocrypha - if lines_in_file < 11500: - book_count = 27 - chapter_count = 260 - elif lines_in_file < 34200: - book_count = 66 - chapter_count = 1188 - else: - book_count = 67 - chapter_count = 1336 - for file_record in osis: + # NOTE: We don't need to do any of the normal encoding detection here, because lxml does it's own encoding + # detection, and the two mechanisms together interfere with each other. + import_file = open(self.filename, 'rb') + language_id = self.get_language(bible_name) + if not language_id: + log.error('Importing books from "%s" failed' % self.filename) + return False + osis_bible_tree = etree.parse(import_file) + namespace = {'ns': 'http://www.bibletechnologies.net/2003/OSIS/namespace'} + num_books = int(osis_bible_tree.xpath("count(//ns:div[@type='book'])", namespaces=namespace)) + log.debug('number of books: %d' % num_books) + self.wizard.increment_progress_bar(translate('BiblesPlugin.OsisImport', + 'Removing unused tags (this may take a few minutes)...')) + # We strip unused tags from the XML, this should leave us with only chapter, verse and div tags. + # Strip tags we don't use - keep content + etree.strip_tags(osis_bible_tree, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}p', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}l', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}lg', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}q', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}a', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}w', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}divineName', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}foreign', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}hi', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}inscription', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}mentioned', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}name', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}reference', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}seg', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}transChange', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}salute', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}signed', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}closer', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}speech', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}speaker', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}list', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}item', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}table', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}head', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}row', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}cell', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}caption')) + # Strip tags we don't use - remove content + etree.strip_elements(osis_bible_tree, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}note', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}milestone', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}title', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}abbr', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}catchWord', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}index', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdg', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdgGroup', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}figure'), + with_tail=False) + # Precompile a few xpath-querys + verse_in_chapter = etree.XPath('count(//ns:chapter[1]/ns:verse)', namespaces=namespace) + text_in_verse = etree.XPath('count(//ns:verse[1]/text())', namespaces=namespace) + # Find books in the bible + bible_books = osis_bible_tree.xpath("//ns:div[@type='book']", namespaces=namespace) + for book in bible_books: if self.stop_import_flag: break - # Try to find the bible language - if not language_id: - language_match = self.language_regex.search(file_record) - if language_match: - language = BiblesResourcesDB.get_language( - language_match.group(1)) - if language: - language_id = language['id'] - self.save_meta('language_id', language_id) - continue - match = self.verse_regex.search(file_record) - if match: - # Set meta language_id if not detected till now - if not language_id: - language_id = self.get_language(bible_name) - if not language_id: - log.error('Importing books from "%s" failed' % self.filename) - return False - match_count += 1 - book = str(match.group(1)) - chapter = int(match.group(2)) - verse = int(match.group(3)) - verse_text = match.group(4) - book_ref_id = self.get_book_ref_id_by_name(book, book_count, language_id) - if not book_ref_id: - log.error('Importing books from "%s" failed' % self.filename) - return False - book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) - if not db_book or db_book.name != book_details['name']: - log.debug('New book: "%s"' % book_details['name']) - db_book = self.create_book( - book_details['name'], - book_ref_id, - book_details['testament_id']) - if last_chapter == 0: - self.wizard.progress_bar.setMaximum(chapter_count) - if last_chapter != chapter: - if last_chapter != 0: - self.session.commit() + # Remove div-tags in the book + etree.strip_tags(book, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}div')) + book_ref_id = self.get_book_ref_id_by_name(book.get('osisID'), num_books) + if not book_ref_id: + book_ref_id = self.get_book_ref_id_by_localised_name(book.get('osisID')) + if not book_ref_id: + log.error('Importing books from "%s" failed' % self.filename) + return False + book_details = BiblesResourcesDB.get_book_by_id(book_ref_id) + db_book = self.create_book(book_details['name'], book_ref_id, book_details['testament_id']) + # Find out if chapter-tags contains the verses, or if it is used as milestone/anchor + if int(verse_in_chapter(book)) > 0: + # The chapter tags contains the verses + for chapter in book: + chapter_number = chapter.get("osisID").split('.')[1] + # Find out if verse-tags contains the text, or if it is used as milestone/anchor + if int(text_in_verse(chapter)) == 0: + # verse-tags are used as milestone + for verse in chapter: + # If this tag marks the start of a verse, the verse text is between this tag and + # the next tag, which the "tail" attribute gives us. + if verse.get('sID'): + verse_number = verse.get("osisID").split('.')[2] + verse_text = verse.tail + if verse_text: + self.create_verse(db_book.id, chapter_number, verse_number, verse_text.strip()) + else: + # Verse-tags contains the text + for verse in chapter: + verse_number = verse.get("osisID").split('.')[2] + self.create_verse(db_book.id, chapter_number, verse_number, verse.text.strip()) self.wizard.increment_progress_bar( - translate('BiblesPlugin.OsisImport', 'Importing %s %s...', - 'Importing ...') % (book_details['name'], chapter)) - last_chapter = chapter - # All of this rigmarole below is because the mod2osis tool from the Sword library embeds XML in the - # OSIS but neglects to enclose the verse text (with XML) in <[CDATA[ ]]> tags. - verse_text = self.note_regex.sub('', verse_text) - verse_text = self.title_regex.sub('', verse_text) - verse_text = self.milestone_regex.sub('', verse_text) - verse_text = self.fi_regex.sub('', verse_text) - verse_text = self.rf_regex.sub('', verse_text) - verse_text = self.lb_regex.sub(' ', verse_text) - verse_text = self.lg_regex.sub('', verse_text) - verse_text = self.l_regex.sub(' ', verse_text) - verse_text = self.w_regex.sub('', verse_text) - verse_text = self.q1_regex.sub('"', verse_text) - verse_text = self.q2_regex.sub('\'', verse_text) - verse_text = self.q_regex.sub('', verse_text) - verse_text = self.divine_name_regex.sub(repl, verse_text) - verse_text = self.trans_regex.sub('', verse_text) - verse_text = verse_text.replace('', '') \ - .replace('', '').replace('', '') \ - .replace('', '').replace('', '') \ - .replace('', '').replace('', '') - verse_text = self.spaces_regex.sub(' ', verse_text) - self.create_verse(db_book.id, chapter, verse, verse_text) - self.application.process_events() - self.session.commit() - if match_count == 0: - success = False + translate('BiblesPlugin.OsisImport', 'Importing %(bookname)s %(chapter)s...' % + {'bookname': db_book.name, 'chapter': chapter_number})) + else: + log.debug('chapters are milestones') + # The chapter tags is used as milestones. For now we assume verses is also milestones + chapter_number = 0 + for element in book: + if element.tag == '{http://www.bibletechnologies.net/2003/OSIS/namespace}chapter' \ + and element.get('sID'): + chapter_number = element.get("osisID").split('.')[1] + self.wizard.increment_progress_bar( + translate('BiblesPlugin.OsisImport', 'Importing %(bookname)s %(chapter)s...' % + {'bookname': db_book.name, 'chapter': chapter_number})) + elif element.tag == '{http://www.bibletechnologies.net/2003/OSIS/namespace}verse' \ + and element.get('sID'): + # If this tag marks the start of a verse, the verse text is between this tag and + # the next tag, which the "tail" attribute gives us. + verse_number = element.get("osisID").split('.')[2] + verse_text = element.tail + if verse_text: + self.create_verse(db_book.id, chapter_number, verse_number, verse_text.strip()) + self.session.commit() + self.application.process_events() except (ValueError, IOError): log.exception('Loading bible from OSIS file failed') + trace_error_handler(log) success = False + except etree.XMLSyntaxError as e: + log.exception('Loading bible from OSIS file failed') + trace_error_handler(log) + success = False + critical_error_message_box(message=translate('BiblesPlugin.OsisImport', + 'The file is not a valid OSIS-XML file: \n%s' % e.msg)) finally: - if osis: - osis.close() + if import_file: + import_file.close() if self.stop_import_flag: return False else: diff --git a/tests/functional/openlp_plugins/bibles/test_osisimport.py b/tests/functional/openlp_plugins/bibles/test_osisimport.py new file mode 100644 index 000000000..af437c267 --- /dev/null +++ b/tests/functional/openlp_plugins/bibles/test_osisimport.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2014 Raoul Snyman # +# Portions copyright (c) 2008-2014 Tim Bentley, Gerald Britton, Jonathan # +# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, # +# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer. # +# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru, # +# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # +# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock, # +# Frode Woldsund, Martin Zibricky, Patrick Zimmermann # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +This module contains tests for the OSIS Bible importer. +""" + +import os +import json +from unittest import TestCase + +from tests.functional import MagicMock, patch +from openlp.plugins.bibles.lib.osis import OSISBible +from openlp.plugins.bibles.lib.db import BibleDB + +TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), + '..', '..', '..', 'resources', 'bibles')) + + +class TestOsisImport(TestCase): + """ + Test the functions in the :mod:`osisimport` module. + """ + + def setUp(self): + self.registry_patcher = patch('openlp.plugins.bibles.lib.db.Registry') + self.registry_patcher.start() + self.manager_patcher = patch('openlp.plugins.bibles.lib.db.Manager') + self.manager_patcher.start() + + def tearDown(self): + self.registry_patcher.stop() + self.manager_patcher.stop() + + def create_importer_test(self): + """ + Test creating an instance of the OSIS file importer + """ + # GIVEN: A mocked out "manager" + mocked_manager = MagicMock() + + # WHEN: An importer object is created + importer = OSISBible(mocked_manager, path='.', name='.', filename='') + + # THEN: The importer should be an instance of BibleDB + self.assertIsInstance(importer, BibleDB) + + def file_import_nested_tags_test(self): + """ + Test the actual import of OSIS Bible file, with nested chapter and verse tags + """ + # GIVEN: Test files with a mocked out "manager", "import_wizard", and mocked functions + # get_book_ref_id_by_name, create_verse, create_book, session and get_language. + result_file = open(os.path.join(TEST_PATH, 'dk1933.json'), 'rb') + test_data = json.loads(result_file.read().decode()) + bible_file = 'osis-dk1933.xml' + with patch('openlp.plugins.bibles.lib.osis.OSISBible.application'): + mocked_manager = MagicMock() + mocked_import_wizard = MagicMock() + importer = OSISBible(mocked_manager, path='.', name='.', filename='') + importer.wizard = mocked_import_wizard + importer.get_book_ref_id_by_name = MagicMock() + importer.create_verse = MagicMock() + importer.create_book = MagicMock() + importer.session = MagicMock() + importer.get_language = MagicMock() + importer.get_language.return_value = 'Danish' + + # WHEN: Importing bible file + importer.filename = os.path.join(TEST_PATH, bible_file) + importer.do_import() + + # THEN: The create_verse() method should have been called with each verse in the file. + self.assertTrue(importer.create_verse.called) + for verse_tag, verse_text in test_data['verses']: + importer.create_verse.assert_any_call(importer.create_book().id, '1', verse_tag, verse_text) + + def file_import_mixed_tags_test(self): + """ + Test the actual import of OSIS Bible file, with nested chapter and milestone verse tags. + """ + # GIVEN: Test files with a mocked out "manager", "import_wizard", and mocked functions + # get_book_ref_id_by_name, create_verse, create_book, session and get_language. + result_file = open(os.path.join(TEST_PATH, 'kjv.json'), 'rb') + test_data = json.loads(result_file.read().decode()) + bible_file = 'osis-kjv.xml' + with patch('openlp.plugins.bibles.lib.osis.OSISBible.application'): + mocked_manager = MagicMock() + mocked_import_wizard = MagicMock() + importer = OSISBible(mocked_manager, path='.', name='.', filename='') + importer.wizard = mocked_import_wizard + importer.get_book_ref_id_by_name = MagicMock() + importer.create_verse = MagicMock() + importer.create_book = MagicMock() + importer.session = MagicMock() + importer.get_language = MagicMock() + importer.get_language.return_value = 'English' + + # WHEN: Importing bible file + importer.filename = os.path.join(TEST_PATH, bible_file) + importer.do_import() + + # THEN: The create_verse() method should have been called with each verse in the file. + self.assertTrue(importer.create_verse.called) + for verse_tag, verse_text in test_data['verses']: + importer.create_verse.assert_any_call(importer.create_book().id, '1', verse_tag, verse_text) + + def file_import_milestone_tags_test(self): + """ + Test the actual import of OSIS Bible file, with milestone chapter and verse tags. + """ + # GIVEN: Test files with a mocked out "manager", "import_wizard", and mocked functions + # get_book_ref_id_by_name, create_verse, create_book, session and get_language. + result_file = open(os.path.join(TEST_PATH, 'web.json'), 'rb') + test_data = json.loads(result_file.read().decode()) + bible_file = 'osis-web.xml' + with patch('openlp.plugins.bibles.lib.osis.OSISBible.application'): + mocked_manager = MagicMock() + mocked_import_wizard = MagicMock() + importer = OSISBible(mocked_manager, path='.', name='.', filename='') + importer.wizard = mocked_import_wizard + importer.get_book_ref_id_by_name = MagicMock() + importer.create_verse = MagicMock() + importer.create_book = MagicMock() + importer.session = MagicMock() + importer.get_language = MagicMock() + importer.get_language.return_value = 'English' + + # WHEN: Importing bible file + importer.filename = os.path.join(TEST_PATH, bible_file) + importer.do_import() + + # THEN: The create_verse() method should have been called with each verse in the file. + self.assertTrue(importer.create_verse.called) + print(importer.create_verse.call_list()) + for verse_tag, verse_text in test_data['verses']: + importer.create_verse.assert_any_call(importer.create_book().id, '1', verse_tag, verse_text) diff --git a/tests/resources/bibles/dk1933.json b/tests/resources/bibles/dk1933.json new file mode 100644 index 000000000..f364cb47e --- /dev/null +++ b/tests/resources/bibles/dk1933.json @@ -0,0 +1,16 @@ +{ + "book": "Genesis", + "chapter": 1, + "verses": [ + [ "1", "I Begyndelsen skabte Gud Himmelen og Jorden."], + [ "2", "Og Jorden var øde og tom, og der var Mørke over Verdensdybet. Men Guds Ånd svævede over Vandene." ], + [ "3", "Og Gud sagde: \"Der blive Lys!\" Og der blev Lys." ], + [ "4", "Og Gud så, at Lyset var godt, og Gud satte Skel mellem Lyset og Mørket," ], + [ "5", "og Gud kaldte Lyset Dag, og Mørket kaldte han Nat. Og det blev Aften, og det blev Morgen, første Dag." ], + [ "6", "Derpå sagde Gud: \"Der blive en Hvælving midt i Vandene til at skille Vandene ad!\"" ], + [ "7", "Og således skete det: Gud gjorde Hvælvingen og skilte Vandet under Hvælvingen fra Vandet over Hvælvingen;" ], + [ "8", "og Gud kaldte Hvælvingen Himmel. Og det blev Aften, og det blev Morgen, anden Dag." ], + [ "9", "Derpå sagde Gud: \"Vandet under Himmelen samle sig på eet Sted, så det faste Land kommer til Syne!\" Og således skete det;" ], + [ "10", "og Gud kaldte det faste Land Jord, og Stedet, hvor Vandet samlede sig, kaldte han Hav. Og Gud så, at det var godt." ] + ] +} \ No newline at end of file diff --git a/tests/resources/bibles/kjv.json b/tests/resources/bibles/kjv.json new file mode 100644 index 000000000..a375a1b40 --- /dev/null +++ b/tests/resources/bibles/kjv.json @@ -0,0 +1,16 @@ +{ + "book": "Genesis", + "chapter": 1, + "verses": [ + [ "1", "In the beginning God created the heaven and the earth."], + [ "2", "And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters." ], + [ "3", "And God said, Let there be light: and there was light." ], + [ "4", "And God saw the light, that it was good: and God divided the light from the darkness." ], + [ "5", "And God called the light Day, and the darkness he called Night. And the evening and the morning were the first day." ], + [ "6", "And God said, Let there be a firmament in the midst of the waters, and let it divide the waters from the waters." ], + [ "7", "And God made the firmament, and divided the waters which were under the firmament from the waters which were above the firmament: and it was so." ], + [ "8", "And God called the firmament Heaven. And the evening and the morning were the second day." ], + [ "9", "And God said, Let the waters under the heaven be gathered together unto one place, and let the dry land appear: and it was so." ], + [ "10", "And God called the dry land Earth; and the gathering together of the waters called he Seas: and God saw that it was good." ] + ] +} diff --git a/tests/resources/bibles/osis-dk1933.xml b/tests/resources/bibles/osis-dk1933.xml new file mode 100644 index 000000000..d51d073fe --- /dev/null +++ b/tests/resources/bibles/osis-dk1933.xml @@ -0,0 +1,32 @@ + + + + +
+ + Dette er Biblen + Bible.DanDetteBiblen + Bible.KJV + + + Bible.KJV + +
+
+
+ + I Begyndelsen skabte Gud Himmelen og Jorden. + Og Jorden var øde og tom, og der var Mørke over Verdensdybet. Men Guds Ånd svævede over Vandene.

+ Og Gud sagde: "Der blive Lys!" Og der blev Lys. + Og Gud så, at Lyset var godt, og Gud satte Skel mellem Lyset og Mørket, + og Gud kaldte Lyset Dag, og Mørket kaldte han Nat. Og det blev Aften, og det blev Morgen, første Dag.

+ Derpå sagde Gud: "Der blive en Hvælving midt i Vandene til at skille Vandene ad!" + Og således skete det: Gud gjorde Hvælvingen og skilte Vandet under Hvælvingen fra Vandet over Hvælvingen; + og Gud kaldte Hvælvingen Himmel. Og det blev Aften, og det blev Morgen, anden Dag.

+ Derpå sagde Gud: "Vandet under Himmelen samle sig på eet Sted, så det faste Land kommer til Syne!" Og således skete det; + og Gud kaldte det faste Land Jord, og Stedet, hvor Vandet samlede sig, kaldte han Hav. Og Gud så, at det var godt. + +

+
+
+
diff --git a/tests/resources/bibles/osis-kjv.xml b/tests/resources/bibles/osis-kjv.xml new file mode 100644 index 000000000..72e42e3aa --- /dev/null +++ b/tests/resources/bibles/osis-kjv.xml @@ -0,0 +1,41 @@ + + + +
+ + King James Version (1769) with Strongs Numbers and Morphology + Bible.KJV + Gen-Rev + Bible.KJV + + + Bible.KJV + + + Dict.Strongs + + + Dict.Robinsons + + + Dict.strongMorph + +
+
+THE FIRST BOOK OF MOSES CALLED GENESIS + +CHAPTER 1. +In the beginning God created the heaven and the earth. +And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters. +And God said, Let there be light: and there was light. +And God saw the light, that it was good: and God divided the light from the darkness.the light from…: Heb. between the light and between the darkness +And God called the light Day, and the darkness he called Night. And the evening and the morning were the first day.And the evening…: Heb. And the evening was, and the morning was etc. +And God said, Let there be a firmament in the midst of the waters, and let it divide the waters from the waters.firmament: Heb. expansion +And God made the firmament, and divided the waters which were under the firmament from the waters which were above the firmament: and it was so. +And God called the firmament Heaven. And the evening and the morning were the second day.And the evening…: Heb. And the evening was, and the morning was etc. +And God said, Let the waters under the heaven be gathered together unto one place, and let the dry land appear: and it was so. +And God called the dry land Earth; and the gathering together of the waters called he Seas: and God saw that it was good. + +
+
+
diff --git a/tests/resources/bibles/osis-web.xml b/tests/resources/bibles/osis-web.xml new file mode 100644 index 000000000..515739ea8 --- /dev/null +++ b/tests/resources/bibles/osis-web.xml @@ -0,0 +1,109 @@ + + + +
+ + 2007-08-26T08.23.41 +

This draft version of the World English Bible is +substantially complete in the New Testament, Genesis, Exodus, Job, Psalms, Proverbs, Ecclesiastes, Song of Solomon, and the “minor” prophets. Editing continues on the other books of the Old Testament. All WEB companion Apocrypha books are still in +rough draft form.

+

Converted web.gbf in GBF to web.osis.xml in +an XML format that is mostly compliant with OSIS 2.0 using gbf2osis.exe. +(Please see http://ebt.cx/translation/ for links to this software.)

+

GBF and OSIS metadata fields do not exactly correspond to each other, so +the conversion is not perfect in the metadata. However, the Scripture portion +should be correct.

+

No attempt was to convert quotation marks to structural markers using q or +speech elements, because this would require language and style-dependent +processing, and because the current OSIS specification is deficient in that +quotation mark processing is not guaranteed to produce the correct results +for all languages and translations. In English texts, the hard part of the +conversion to markup is figuring out what ’ means. +The other difficulty is that OSIS in no way guarantees that these punctuation +marks would be reconstituted properly by software that reads OSIS files +for anything other than modern English, and even then, it does not +accommodate all styles of punctuation and all cases. +We strongly recommend that anyone using OSIS NOT replace quotation mark +punctuation in any existing text with q or speech elements. It is better +for multiple language processing capabilities to leave the quotation +punctuation as part of the text. If you need the q or speech markup, then you +may supplement those punctuation marks with those markup elements, but specify +the n='' parameter in those elements to indicate that no generation of any +punctuation from those markup elements is required or desired. That way you +can have BOTH correct punctuation already in the text AND markup so that you +can automatically determine when you are in a quotation or not, independent +of language. This may be useful for a search by speaker, for example.

+

The output of gbf2osis marks Jesus' words in a non-standard way using the q +element AND quotation marks if they were marked with FR/Fr markers in the GBF +file. The OSIS 2.0 specification requires that quotation marks be stripped out, +and reinserted by software that reads the OSIS files when q elements are used. +This is not acceptable for the reasons given above, and we choose not to do +that, but we used the q element with who='Jesus' to indicate Jesus' words. +Do not generate any additional punctuation due to these markers. The correct +punctuation is already in the text.

+

OSIS does not currently support footnote start anchors. Therefore, these +start anchors have been represented with milestone elements, in case someone +might like to use them, for example, to start an href element in a conversion +to HTML. (OSIS sort of supports the same idea by allowing a catchword to be +defined within a footnote, but I did not implement the processing to convert +to this different way of doing things, and it isn't exactly the same, anyway.)

+

Traditional psalm book titles are rendered as text rather than titles, because +the title element does not support containing transChange elements, as would be +required to encode the KJV text using OSIS title elements. This may actually be +a superior solution, anyway, in that the Masoretic text makes no such distinction +(even though many modern typeset Bibles do make a typographic distinction in this +case).

+

The schema location headers were modified to use local copies rather than the +standard locations so that these files could be validated and used without an +Internet connection active at all times (very important for the developer's +remote island location), but you may wish to change them back.

+
+ + World English Bible + WEB committee + 2007-08-26 + Rainbow Missions, Inc. + Bible + Bible.en.WEB.draft.2007-08-26 + http://eBible.org/web/ + ENG + Wherever English is spoken in the world. + The World English Bible is dedicated to the Public Domain by the translators and editors. It is not copyrighted. “World English Bible” and the World English Bible logo are a trademarks of Rainbow +Missions, Inc. They may only be used to identify this translation of the Holy Bible as published by Rainbow Missions, Inc., and faithful copies and quotations. “Faithful copies” include copies converted to other formats (i. e. HTML, PDF, etc.) or +typeset differently, without altering the text of the Scriptures, except that changing the spellings between preferred American and British usage is allowed. Use of the markings of direct quotes of Jesus Christ for different rendition (i. e. red text) +is optional. Comments and typo reports are welcome at http://eBible.org/cgi-bin/comment.cgi. Please see http://eBible.org/web/ for updates, revision status, free downloads, and printed edition purchase information. + Gen-Mal + Tob-AddEsth + Bar-EpJer + AddDan + Matt-Rev + Bible.WEB + +
+
+
+ Genesis + +

+ +In the beginning GodThe Hebrew word rendered “God” is “Elohim.” After “God,” the Hebrew has the two letters “Aleph Tav” (the first and last letters of the Hebrew alphabet) as a grammatical marker. created the heavens and the earth. +Now the earth was formless and empty. Darkness was on the surface of the deep. God’s Spirit was hovering over the surface of the waters.

+

+ +God said, “Let there be light,” and there was light. +God saw the light, and saw that it was good. God divided the light from the darkness. +God called the light “day,” and the darkness he called “night.” There was evening and there was morning, one day.

+

+ +God said, “Let there be an expanse in the middle of the waters, and let it divide the waters from the waters.” +God made the expanse, and divided the waters which were under the expanse from the waters which were above the expanse; and it was so. +God called the expanse “sky.” There was evening and there was morning, a second day.

+

+ +God said, “Let the waters under the sky be gathered together to one place, and let the dry land appear;” and it was so. +God called the dry land “earth,” and the gathering together of the waters he called “seas.” God saw that it was good. +

+
+
+
+
diff --git a/tests/resources/bibles/web.json b/tests/resources/bibles/web.json new file mode 100644 index 000000000..0fbc95669 --- /dev/null +++ b/tests/resources/bibles/web.json @@ -0,0 +1,16 @@ +{ + "book": "Genesis", + "chapter": "1", + "verses": [ + [ "1", "In the beginning God created the heavens and the earth."], + [ "2", "Now the earth was formless and empty. Darkness was on the surface of the deep. God’s Spirit was hovering over the surface of the waters." ], + [ "3", "God said, “Let there be light,” and there was light." ], + [ "4", "God saw the light, and saw that it was good. God divided the light from the darkness." ], + [ "5", "God called the light “day,” and the darkness he called “night.” There was evening and there was morning, one day." ], + [ "6", "God said, “Let there be an expanse in the middle of the waters, and let it divide the waters from the waters.”" ], + [ "7", "God made the expanse, and divided the waters which were under the expanse from the waters which were above the expanse; and it was so." ], + [ "8", "God called the expanse “sky.” There was evening and there was morning, a second day." ], + [ "9", "God said, “Let the waters under the sky be gathered together to one place, and let the dry land appear;” and it was so." ], + [ "10", "God called the dry land “earth,” and the gathering together of the waters he called “seas.” God saw that it was good." ] + ] +} From d48bf080940e67375d8459aa1421157cf719f05c Mon Sep 17 00:00:00 2001 From: Tomas Groth Date: Sun, 24 Aug 2014 16:04:42 +0100 Subject: [PATCH 2/4] Remove a few debug prints and move a few lines... --- openlp/plugins/bibles/lib/osis.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/openlp/plugins/bibles/lib/osis.py b/openlp/plugins/bibles/lib/osis.py index 851db39c4..5dd9d9d3e 100644 --- a/openlp/plugins/bibles/lib/osis.py +++ b/openlp/plugins/bibles/lib/osis.py @@ -72,10 +72,20 @@ class OSISBible(BibleDB): osis_bible_tree = etree.parse(import_file) namespace = {'ns': 'http://www.bibletechnologies.net/2003/OSIS/namespace'} num_books = int(osis_bible_tree.xpath("count(//ns:div[@type='book'])", namespaces=namespace)) - log.debug('number of books: %d' % num_books) self.wizard.increment_progress_bar(translate('BiblesPlugin.OsisImport', 'Removing unused tags (this may take a few minutes)...')) # We strip unused tags from the XML, this should leave us with only chapter, verse and div tags. + # Strip tags we don't use - remove content + etree.strip_elements(osis_bible_tree, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}note', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}milestone', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}title', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}abbr', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}catchWord', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}index', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdg', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdgGroup', + '{http://www.bibletechnologies.net/2003/OSIS/namespace}figure'), + with_tail=False) # Strip tags we don't use - keep content etree.strip_tags(osis_bible_tree, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}p', '{http://www.bibletechnologies.net/2003/OSIS/namespace}l', @@ -104,17 +114,6 @@ class OSISBible(BibleDB): '{http://www.bibletechnologies.net/2003/OSIS/namespace}row', '{http://www.bibletechnologies.net/2003/OSIS/namespace}cell', '{http://www.bibletechnologies.net/2003/OSIS/namespace}caption')) - # Strip tags we don't use - remove content - etree.strip_elements(osis_bible_tree, ('{http://www.bibletechnologies.net/2003/OSIS/namespace}note', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}milestone', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}title', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}abbr', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}catchWord', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}index', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdg', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}rdgGroup', - '{http://www.bibletechnologies.net/2003/OSIS/namespace}figure'), - with_tail=False) # Precompile a few xpath-querys verse_in_chapter = etree.XPath('count(//ns:chapter[1]/ns:verse)', namespaces=namespace) text_in_verse = etree.XPath('count(//ns:verse[1]/text())', namespaces=namespace) @@ -158,7 +157,6 @@ class OSISBible(BibleDB): translate('BiblesPlugin.OsisImport', 'Importing %(bookname)s %(chapter)s...' % {'bookname': db_book.name, 'chapter': chapter_number})) else: - log.debug('chapters are milestones') # The chapter tags is used as milestones. For now we assume verses is also milestones chapter_number = 0 for element in book: From 71a1b26e1cf6f919b198f67f1a741952b361c079 Mon Sep 17 00:00:00 2001 From: Tomas Groth Date: Sun, 24 Aug 2014 16:14:29 +0100 Subject: [PATCH 3/4] Changed test desciption. --- tests/functional/openlp_plugins/bibles/test_osisimport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/openlp_plugins/bibles/test_osisimport.py b/tests/functional/openlp_plugins/bibles/test_osisimport.py index af437c267..540403f84 100644 --- a/tests/functional/openlp_plugins/bibles/test_osisimport.py +++ b/tests/functional/openlp_plugins/bibles/test_osisimport.py @@ -102,7 +102,7 @@ class TestOsisImport(TestCase): def file_import_mixed_tags_test(self): """ - Test the actual import of OSIS Bible file, with nested chapter and milestone verse tags. + Test the actual import of OSIS Bible file, with chapter tags containing milestone verse tags. """ # GIVEN: Test files with a mocked out "manager", "import_wizard", and mocked functions # get_book_ref_id_by_name, create_verse, create_book, session and get_language. From 7fecaa1d70703d191a3c74491710347d973fc9f4 Mon Sep 17 00:00:00 2001 From: Tomas Groth Date: Wed, 27 Aug 2014 15:10:33 +0200 Subject: [PATCH 4/4] Added language detection when importing Fixes: https://launchpad.net/bugs/1214875 --- openlp/plugins/bibles/lib/opensong.py | 1 + openlp/plugins/bibles/lib/osis.py | 13 ++++++++++--- openlp/plugins/bibles/lib/zefania.py | 11 +++++++++-- .../openlp_plugins/bibles/test_osisimport.py | 1 - 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/openlp/plugins/bibles/lib/opensong.py b/openlp/plugins/bibles/lib/opensong.py index dccdbf2cf..420ace1ec 100644 --- a/openlp/plugins/bibles/lib/opensong.py +++ b/openlp/plugins/bibles/lib/opensong.py @@ -88,6 +88,7 @@ class OpenSongBible(BibleDB): 'Incorrect Bible file type supplied. This looks like a Zefania XML bible, ' 'please use the Zefania import option.')) return False + # No language info in the opensong format, so ask the user language_id = self.get_language(bible_name) if not language_id: log.error('Importing books from "%s" failed' % self.filename) diff --git a/openlp/plugins/bibles/lib/osis.py b/openlp/plugins/bibles/lib/osis.py index 5dd9d9d3e..9f0bb3801 100644 --- a/openlp/plugins/bibles/lib/osis.py +++ b/openlp/plugins/bibles/lib/osis.py @@ -65,12 +65,19 @@ class OSISBible(BibleDB): # NOTE: We don't need to do any of the normal encoding detection here, because lxml does it's own encoding # detection, and the two mechanisms together interfere with each other. import_file = open(self.filename, 'rb') - language_id = self.get_language(bible_name) + osis_bible_tree = etree.parse(import_file) + namespace = {'ns': 'http://www.bibletechnologies.net/2003/OSIS/namespace'} + # Find bible language + language_id = None + language = osis_bible_tree.xpath("//ns:osisText/@xml:lang", namespaces=namespace) + if language: + language_id = BiblesResourcesDB.get_language(language[0]) + # The language couldn't be detected, ask the user + if not language_id: + language_id = self.get_language(bible_name) if not language_id: log.error('Importing books from "%s" failed' % self.filename) return False - osis_bible_tree = etree.parse(import_file) - namespace = {'ns': 'http://www.bibletechnologies.net/2003/OSIS/namespace'} num_books = int(osis_bible_tree.xpath("count(//ns:div[@type='book'])", namespaces=namespace)) self.wizard.increment_progress_bar(translate('BiblesPlugin.OsisImport', 'Removing unused tags (this may take a few minutes)...')) diff --git a/openlp/plugins/bibles/lib/zefania.py b/openlp/plugins/bibles/lib/zefania.py index c52b58eae..81fb49eb5 100644 --- a/openlp/plugins/bibles/lib/zefania.py +++ b/openlp/plugins/bibles/lib/zefania.py @@ -64,11 +64,18 @@ class ZefaniaBible(BibleDB): # NOTE: We don't need to do any of the normal encoding detection here, because lxml does it's own encoding # detection, and the two mechanisms together interfere with each other. import_file = open(self.filename, 'rb') - language_id = self.get_language(bible_name) + zefania_bible_tree = etree.parse(import_file) + # Find bible language + language_id = None + language = zefania_bible_tree.xpath("/XMLBIBLE/INFORMATION/language/text()") + if language: + language_id = BiblesResourcesDB.get_language(language[0]) + # The language couldn't be detected, ask the user + if not language_id: + language_id = self.get_language(bible_name) if not language_id: log.error('Importing books from "%s" failed' % self.filename) return False - zefania_bible_tree = etree.parse(import_file) num_books = int(zefania_bible_tree.xpath("count(//BIBLEBOOK)")) # Strip tags we don't use - keep content etree.strip_tags(zefania_bible_tree, ('STYLE', 'GRAM', 'NOTE', 'SUP', 'XREF')) diff --git a/tests/functional/openlp_plugins/bibles/test_osisimport.py b/tests/functional/openlp_plugins/bibles/test_osisimport.py index 540403f84..ba23feba1 100644 --- a/tests/functional/openlp_plugins/bibles/test_osisimport.py +++ b/tests/functional/openlp_plugins/bibles/test_osisimport.py @@ -157,6 +157,5 @@ class TestOsisImport(TestCase): # THEN: The create_verse() method should have been called with each verse in the file. self.assertTrue(importer.create_verse.called) - print(importer.create_verse.call_list()) for verse_tag, verse_text in test_data['verses']: importer.create_verse.assert_any_call(importer.create_book().id, '1', verse_tag, verse_text)