From c20b7f53013303ea69ce9f58227e2f8b4cdd4bbc Mon Sep 17 00:00:00 2001 From: Tomas Groth Date: Fri, 3 Jan 2020 22:50:00 +0100 Subject: [PATCH] Fix bibleserver integration. --- .../plugins/bibles/forms/bibleimportform.py | 25 ++- openlp/plugins/bibles/lib/importers/http.py | 172 ++++++++++-------- .../openlp_plugins/bibles/test_bibleserver.py | 5 +- .../openlp_plugins/bibles/test_lib_http.py | 18 +- 4 files changed, 133 insertions(+), 87 deletions(-) diff --git a/openlp/plugins/bibles/forms/bibleimportform.py b/openlp/plugins/bibles/forms/bibleimportform.py index 7a37e6b90..451e61545 100644 --- a/openlp/plugins/bibles/forms/bibleimportform.py +++ b/openlp/plugins/bibles/forms/bibleimportform.py @@ -43,7 +43,7 @@ from openlp.core.lib.ui import critical_error_message_box from openlp.core.widgets.edits import PathEdit from openlp.core.widgets.wizard import OpenLPWizard, WizardStrings from openlp.plugins.bibles.lib.db import clean_filename -from openlp.plugins.bibles.lib.importers.http import BGExtract, CWExtract +from openlp.plugins.bibles.lib.importers.http import BGExtract, CWExtract, BSExtract from openlp.plugins.bibles.lib.manager import BibleFormat @@ -57,10 +57,9 @@ class WebDownload(object): Unknown = -1 Crosswalk = 0 BibleGateway = 1 - Bibleserver = 2 + BibleServer = 2 - # NOTE: BibleServer support has been disabled since we can't currently parse it. Re-add if/when fixed. - Names = ['Crosswalk', 'BibleGateway'] + Names = ['Crosswalk', 'BibleGateway', 'BibleServer'] class BibleImportForm(OpenLPWizard): @@ -228,8 +227,7 @@ class BibleImportForm(OpenLPWizard): self.web_bible_layout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.web_source_label) self.web_source_combo_box = QtWidgets.QComboBox(self.web_widget) self.web_source_combo_box.setObjectName('WebSourceComboBox') - # NOTE: Set to 2 items since BibleServer has been disabled. Set to 3 if/when fixed - self.web_source_combo_box.addItems(['', '']) + self.web_source_combo_box.addItems(['', '', '']) self.web_source_combo_box.setEnabled(False) self.web_bible_layout.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.web_source_combo_box) self.web_translation_label = QtWidgets.QLabel(self.web_bible_tab) @@ -241,8 +239,7 @@ class BibleImportForm(OpenLPWizard): self.web_translation_combo_box.setEnabled(False) self.web_bible_layout.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.web_translation_combo_box) self.web_progress_bar = QtWidgets.QProgressBar(self) - # NOTE: Set to 2 since BibleServer has been disabled. Set to 3 if/when fixed - self.web_progress_bar.setRange(0, 2) + self.web_progress_bar.setRange(0, 3) self.web_progress_bar.setObjectName('WebTranslationProgressBar') self.web_progress_bar.setVisible(False) self.web_bible_layout.setWidget(3, QtWidgets.QFormLayout.SpanningRole, self.web_progress_bar) @@ -403,9 +400,8 @@ class BibleImportForm(OpenLPWizard): 'Crosswalk')) self.web_source_combo_box.setItemText(WebDownload.BibleGateway, translate('BiblesPlugin.ImportWizardForm', 'BibleGateway')) - # NOTE: BibleServer support has been disabled since we can't currently parse it. Re-add if/when fixed. - # self.web_source_combo_box.setItemText(WebDownload.Bibleserver, translate('BiblesPlugin.ImportWizardForm', - # 'Bibleserver')) + self.web_source_combo_box.setItemText(WebDownload.BibleServer, translate('BiblesPlugin.ImportWizardForm', + 'Bibleserver')) self.web_translation_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bible:')) self.sword_bible_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bibles:')) self.sword_folder_label.setText(translate('BiblesPlugin.ImportWizardForm', 'SWORD data folder:')) @@ -584,7 +580,8 @@ class BibleImportForm(OpenLPWizard): # TODO: Where does critical_error_message_box get %s string from? # NOTE: BibleServer support has been disabled since we can't currently parse it. Re-add if/when fixed. for (download_type, extractor) in ((WebDownload.Crosswalk, CWExtract()), - (WebDownload.BibleGateway, BGExtract())): + (WebDownload.BibleGateway, BGExtract()), + (WebDownload.BibleServer, BSExtract())): try: bibles = extractor.get_bibles_from_http() except (urllib.error.URLError, ConnectionError): @@ -770,4 +767,6 @@ class BibleImportForm(OpenLPWizard): self.progress_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Your Bible import failed.')) del self.manager.db_cache[importer.name] - delete_database(self.plugin.settings_section, importer.file) + # Don't delete the db if it wasen't created + if hasattr(importer, 'file'): + delete_database(self.plugin.settings_section, importer.file) diff --git a/openlp/plugins/bibles/lib/importers/http.py b/openlp/plugins/bibles/lib/importers/http.py index 7ca6cb79c..5fdf1ecac 100644 --- a/openlp/plugins/bibles/lib/importers/http.py +++ b/openlp/plugins/bibles/lib/importers/http.py @@ -52,28 +52,82 @@ UGLY_CHARS = { } VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*') + +# Manually extracted from https://www.bibleserver.com/webmasters +BIBLESERVER_TRANSLATIONS = { + 'ESV': {'name': 'English Standard Version', 'lang': 'English', 'sections': ['OT', 'NT']}, + 'NIV': {'name': 'New International Version', 'lang': 'English', 'sections': ['OT', 'NT']}, + 'NIRV': {'name': 'New Int. Readers Version', 'lang': 'English', 'sections': ['OT', 'NT']}, + 'KJV': {'name': 'King James Version', 'lang': 'English', 'sections': ['OT', 'NT']}, + 'LUT': {'name': 'Lutherbibel 2017', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']}, + 'ELB': {'name': 'Elberfelder Bibel', 'lang': 'German', 'sections': ['OT', 'NT']}, + 'HFA': {'name': 'Hoffnung für Alle', 'lang': 'German', 'sections': ['OT', 'NT']}, + 'SLT': {'name': 'Schlachter 2000', 'lang': 'German', 'sections': ['OT', 'NT']}, + 'ZB': {'name': 'Zürcher Bibel', 'lang': 'German', 'sections': ['OT', 'NT']}, + 'NGÜ': {'name': 'Neue Genfer Übersetzung', 'lang': 'German', 'sections': ['OT', 'NT']}, + 'GNB': {'name': 'Gute Nachricht Bibel', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']}, + 'EU': {'name': 'Einheitsübersetzung 2016', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']}, + 'NLB': {'name': 'Neues Leben. Die Bibel', 'lang': 'German', 'sections': ['OT', 'NT']}, + 'NeÜ': {'name': 'Neue evangelistische Übersetzung', 'lang': 'German', 'sections': ['OT', 'NT']}, + 'MENG': {'name': 'Menge Bibel', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']}, + 'BDS': {'name': 'Bible du Semeur', 'lang': 'French', 'sections': ['OT', 'NT']}, + 'S21': {'name': 'Segond 21', 'lang': 'French', 'sections': ['OT', 'NT']}, + 'ITA': {'name': 'La Parola è Vita', 'lang': 'Italian', 'sections': ['OT', 'NT']}, + 'NRS': {'name': 'Nuova Riveduta 2006', 'lang': 'Italian', 'sections': ['OT', 'NT']}, + 'HTB': {'name': 'Het Boek', 'lang': 'Dutch', 'sections': ['OT', 'NT']}, + 'LSG': {'name': 'Louis Segond 1910', 'lang': 'French', 'sections': ['OT', 'NT']}, + 'CST': {'name': 'Nueva Versión Internacional (Castilian) ', 'lang': 'Spanish', 'sections': ['OT', 'NT']}, + 'NVI': {'name': 'Nueva Versión Internacional', 'lang': 'Spanish', 'sections': ['OT', 'NT']}, + 'BTX': {'name': 'La Biblia Textual', 'lang': 'Spanish', 'sections': ['OT', 'NT']}, + 'PRT': {'name': 'O Livro', 'lang': 'Portuguese', 'sections': ['OT', 'NT']}, + 'NOR': {'name': 'En Levende Bok', 'lang': 'Norwegian', 'sections': ['OT', 'NT']}, + 'BSV': {'name': 'Nya Levande Bibeln', 'lang': 'Swedish', 'sections': ['OT', 'NT']}, + 'DK': {'name': 'Bibelen på hverdagsdansk', 'lang': 'Danish', 'sections': ['OT', 'NT']}, + 'PSZ': {'name': 'Słowo Życia', 'lang': 'Polish', 'sections': ['OT', 'NT']}, + 'CEP': {'name': 'Český ekumenický překlad', 'lang': 'Czech', 'sections': ['OT', 'NT', 'Apocrypha']}, + 'SNC': {'name': 'Slovo na cestu', 'lang': 'Czech', 'sections': ['OT', 'NT']}, + 'B21': {'name': 'Bible, překlad 21. století', 'lang': 'Czech', 'sections': ['OT', 'NT', 'Apocrypha']}, + 'BKR': {'name': 'Bible Kralická', 'lang': 'Czech', 'sections': ['OT', 'NT']}, + 'NPK': {'name': 'Nádej pre kazdého', 'lang': 'Slovak', 'sections': ['OT', 'NT']}, + 'KAR': {'name': 'IBS-fordítás (Új Károli) ', 'lang': 'Hungarian', 'sections': ['OT']}, + 'HUN': {'name': 'Hungarian', 'lang': 'Hungarian', 'sections': ['OT', 'NT']}, + 'NTR': {'name': 'Noua traducere în limba românã', 'lang': 'Romanian', 'sections': ['OT', 'NT']}, + 'BGV': {'name': 'Верен', 'lang': 'Bulgarian', 'sections': ['OT', 'NT']}, + 'CBT': {'name': 'Библия, нов превод от оригиналните езици', 'lang': 'Bulgarian', 'sections': ['OT', 'NT', + 'Apocrypha']}, + 'CKK': {'name': 'Knjiga O Kristu', 'lang': 'Croatian', 'sections': ['OT', 'NT']}, + 'RSZ': {'name': 'Новый перевод на русский язык', 'lang': 'Russian', 'sections': ['OT', 'NT']}, + 'CARS': {'name': 'Священное Писание, Восточный перевод', 'lang': 'Russian', 'sections': ['OT', 'NT']}, + 'TR': {'name': 'Türkçe', 'lang': 'Turkish', 'sections': ['OT', 'NT']}, + 'NAV': {'name': 'Ketab El Hayat', 'lang': 'Arabic', 'sections': ['OT', 'NT']}, + 'FCB': {'name': 'کتاب مقدس، ترجمه تفسیری', 'lang': 'Persian', 'sections': ['OT', 'NT']}, + 'CUVS': {'name': '中文和合本(简体) ', 'lang': 'Chinese (Simplified)', 'sections': ['OT', 'NT']}, + 'CCBT': {'name': '聖經當代譯本修訂版', 'lang': 'Chinese (Simplified)', 'sections': ['OT', 'NT']}, +} + BIBLESERVER_LANGUAGE_CODE = { - 'fl_1': 'de', - 'fl_2': 'en', - 'fl_3': 'fr', - 'fl_4': 'it', - 'fl_5': 'es', - 'fl_6': 'pt', - 'fl_7': 'ru', - 'fl_8': 'sv', - 'fl_9': 'no', - 'fl_10': 'nl', - 'fl_11': 'cs', - 'fl_12': 'sk', - 'fl_13': 'ro', - 'fl_14': 'hr', - 'fl_15': 'hu', - 'fl_16': 'bg', - 'fl_17': 'ar', - 'fl_18': 'tr', - 'fl_19': 'pl', - 'fl_20': 'da', - 'fl_21': 'zh' + 'German': 'de', + 'English': 'en', + 'French': 'fr', + 'Italian': 'it', + 'Spanish': 'es', + 'Portuguese': 'pt', + 'Russian': 'ru', + 'Swedish': 'sv', + 'Norwegian': 'no', + 'Dutch': 'nl', + 'Czech': 'cs', + 'Slovak': 'sk', + 'Romanian': 'ro', + 'Croatian': 'hr', + 'Hungarian': 'hu', + 'Bulgarian': 'bg', + 'Arabic': 'ar', + 'Turkish': 'tr', + 'Polish': 'pl', + 'Danish': 'da', + 'Chinese (Simplified)': 'zh', + 'Persian': 'fa' } CROSSWALK_LANGUAGES = { @@ -363,7 +417,7 @@ class BSExtract(RegistryProperties): def get_bible_chapter(self, version, book_name, chapter): """ - Access and decode bibles via Bibleserver mobile website + Access and decode bibles via Bibleserver AMP website :param version: The version of the bible like NIV for New International Version :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung @@ -374,46 +428,44 @@ class BSExtract(RegistryProperties): chapter=chapter)) url_version = urllib.parse.quote(version.encode("utf-8")) url_book_name = urllib.parse.quote(book_name.encode("utf-8")) - chapter_url = 'http://m.bibleserver.com/text/{version}/{name}{chapter:d}'.format(version=url_version, - name=url_book_name, - chapter=chapter) - header = ('Accept-Language', 'en') - soup = get_soup_for_bible_ref(chapter_url, header) + chapter_url = 'https://bibleserver.com/amp/{version}/{name}{chapter:d}'.format(version=url_version, + name=url_book_name, + chapter=chapter) + soup = get_soup_for_bible_ref(chapter_url) if not soup: return None self.application.process_events() - content = soup.find('div', 'content') + content = soup.find_all('span', 'chapter-wrapper__verse') if not content: log.error('No verses found in the Bibleserver response.') send_error_message('parse') return None - content = content.find('div').find_all('div') verses = {} for verse in content: self.application.process_events() - versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class']))) - verses[versenumber] = verse.contents[1].rstrip('\n') + versenumber = int(verse.find('span', 'chapter-wrapper__verse__number').get_text()) + verses[versenumber] = verse.find('span', 'chapter-wrapper__verse__content').get_text() return SearchResults(book_name, chapter, verses) def get_books_from_http(self, version): """ - Load a list of all books a Bible contains from Bibleserver mobile website. + Load a list of all books a Bible contains from BiblesResourcesDB. :param version: The version of the Bible like NIV for New International Version """ log.debug('BSExtract.get_books_from_http("{version}")'.format(version=version)) - url_version = urllib.parse.quote(version.encode("utf-8")) - chapter_url = 'http://m.bibleserver.com/overlay/selectBook?translation={version}'.format(version=url_version) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - content = soup.find('ul') - if not content: - log.error('No books found in the Bibleserver response.') - send_error_message('parse') - return None - content = content.find_all('li') - return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)] + # Parsing the book list from the website is near impossible, so we use the list from BiblesResourcesDB + bible = BIBLESERVER_TRANSLATIONS[version] + all_books = BiblesResourcesDB.get_books() + books = [] + for book in all_books: + if book['testament_id'] == 1 and 'OT' in bible['sections']: + books.append(book['name']) + elif book['testament_id'] == 2 and 'NT' in bible['sections']: + books.append(book['name']) + elif book['testament_id'] == 3 and 'Apocrypha' in bible['sections']: + books.append(book['name']) + return books def get_bibles_from_http(self): """ @@ -422,34 +474,12 @@ class BSExtract(RegistryProperties): returns a list in the form [(biblename, biblekey, language_code)] """ log.debug('BSExtract.get_bibles_from_http') - bible_url = 'http://www.bibleserver.com/index.php?language=2' - soup = get_soup_for_bible_ref(bible_url) - if not soup: - return None - bible_links = soup.find_all('a', {'class': 'trlCell'}) - if not bible_links: - log.debug('No a tags found - did site change?') - return None + # we need to cheat a bit and load it from a hardcoded list since the website is not parsable anymore... bibles = [] - for link in bible_links: - bible_name = link.get_text() - # Skip any audio - if 'audio' in bible_name.lower(): - continue - try: - bible_link = link['href'] - bible_key = bible_link[bible_link.rfind('/') + 1:] - css_classes = link['class'] - except KeyError: - log.debug('No href/class attribute found - did site change?') - language_code = '' - for css_class in css_classes: - if css_class.startswith('fl_'): - try: - language_code = BIBLESERVER_LANGUAGE_CODE[css_class] - except KeyError: - language_code = '' - bibles.append((bible_name, bible_key, language_code)) + for bible in BIBLESERVER_TRANSLATIONS.keys(): + bible_item = BIBLESERVER_TRANSLATIONS[bible] + bible_tuple = (bible_item['name'], bible, BIBLESERVER_LANGUAGE_CODE[bible_item['lang']]) + bibles.append(bible_tuple) return bibles diff --git a/tests/functional/openlp_plugins/bibles/test_bibleserver.py b/tests/functional/openlp_plugins/bibles/test_bibleserver.py index 0bc81ebf2..accff1c0b 100644 --- a/tests/functional/openlp_plugins/bibles/test_bibleserver.py +++ b/tests/functional/openlp_plugins/bibles/test_bibleserver.py @@ -21,7 +21,7 @@ """ This module contains tests for the http module of the Bibles plugin. """ -from unittest import TestCase +from unittest import TestCase, skip from unittest.mock import MagicMock, patch from bs4 import BeautifulSoup @@ -54,6 +54,7 @@ class TestBSExtract(TestCase): self.socket_patcher.stop() self.urllib_patcher.stop() + @skip('BSExtract does not currently use http for books') def test_get_books_from_http_no_soup(self): """ Test the get_books_from_http method when get_soup_for_bible_ref returns a falsey value @@ -77,6 +78,7 @@ class TestBSExtract(TestCase): assert result is None, \ 'BSExtract.get_books_from_http should return None when get_soup_for_bible_ref returns a false value' + @skip('BSExtract does not currently use http for books') def test_get_books_from_http_no_content(self): """ Test the get_books_from_http method when the specified element cannot be found in the tag object returned from @@ -107,6 +109,7 @@ class TestBSExtract(TestCase): assert result is None, \ 'BSExtract.get_books_from_http should return None when get_soup_for_bible_ref returns a false value' + @skip('BSExtract does not currently use http for books') def test_get_books_from_http_content(self): """ Test the get_books_from_http method with sample HTML diff --git a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py index 5e396978b..cf62a748b 100644 --- a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py +++ b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py @@ -22,7 +22,7 @@ Package to test the openlp.plugin.bible.lib.https package. """ import os -from unittest import TestCase, skipIf, skip +from unittest import TestCase, skipIf from unittest.mock import MagicMock from openlp.core.common.registry import Registry @@ -122,7 +122,6 @@ class TestBibleHTTP(TestCase): # THEN: We should get back a valid service item assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed' - @skip("We can't currently parse BibelServer") def test_bibleserver_get_bibles(self): """ Test getting list of bibles from BibleServer.com @@ -138,6 +137,21 @@ class TestBibleHTTP(TestCase): assert ('New Int. Readers Version', 'NIRV', 'en') in bibles assert ('Священное Писание, Восточный перевод', 'CARS', 'ru') in bibles + def test_bibleserver_get_verse_text(self): + """ + Test verse text from bibleserver.com + """ + # GIVEN: A new Crosswalk extraction class + handler = BSExtract() + + # WHEN: downloading NIV Genesis from Crosswalk + niv_genesis_chapter_one = handler.get_bible_chapter('NIV', 'Genesis', 1) + + # THEN: The verse list should contain the verses + assert niv_genesis_chapter_one.has_verse_list() is True + assert 'In the beginning God created the heavens and the earth.' == niv_genesis_chapter_one.verse_list[1], \ + 'The first chapter of genesis should have been fetched.' + def test_biblegateway_get_bibles(self): """ Test getting list of bibles from BibleGateway.com