Merge branch 'fix-bibleserver' into 'master'

Fix bibleserver integration

See merge request openlp/openlp!117
This commit is contained in:
Tim Bentley 2020-01-04 07:10:13 +00:00
commit 04c48313d0
4 changed files with 133 additions and 87 deletions

View File

@ -43,7 +43,7 @@ from openlp.core.lib.ui import critical_error_message_box
from openlp.core.widgets.edits import PathEdit from openlp.core.widgets.edits import PathEdit
from openlp.core.widgets.wizard import OpenLPWizard, WizardStrings from openlp.core.widgets.wizard import OpenLPWizard, WizardStrings
from openlp.plugins.bibles.lib.db import clean_filename from openlp.plugins.bibles.lib.db import clean_filename
from openlp.plugins.bibles.lib.importers.http import BGExtract, CWExtract from openlp.plugins.bibles.lib.importers.http import BGExtract, CWExtract, BSExtract
from openlp.plugins.bibles.lib.manager import BibleFormat from openlp.plugins.bibles.lib.manager import BibleFormat
@ -57,10 +57,9 @@ class WebDownload(object):
Unknown = -1 Unknown = -1
Crosswalk = 0 Crosswalk = 0
BibleGateway = 1 BibleGateway = 1
Bibleserver = 2 BibleServer = 2
# NOTE: BibleServer support has been disabled since we can't currently parse it. Re-add if/when fixed. Names = ['Crosswalk', 'BibleGateway', 'BibleServer']
Names = ['Crosswalk', 'BibleGateway']
class BibleImportForm(OpenLPWizard): class BibleImportForm(OpenLPWizard):
@ -228,8 +227,7 @@ class BibleImportForm(OpenLPWizard):
self.web_bible_layout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.web_source_label) self.web_bible_layout.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.web_source_label)
self.web_source_combo_box = QtWidgets.QComboBox(self.web_widget) self.web_source_combo_box = QtWidgets.QComboBox(self.web_widget)
self.web_source_combo_box.setObjectName('WebSourceComboBox') self.web_source_combo_box.setObjectName('WebSourceComboBox')
# NOTE: Set to 2 items since BibleServer has been disabled. Set to 3 if/when fixed self.web_source_combo_box.addItems(['', '', ''])
self.web_source_combo_box.addItems(['', ''])
self.web_source_combo_box.setEnabled(False) self.web_source_combo_box.setEnabled(False)
self.web_bible_layout.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.web_source_combo_box) self.web_bible_layout.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.web_source_combo_box)
self.web_translation_label = QtWidgets.QLabel(self.web_bible_tab) self.web_translation_label = QtWidgets.QLabel(self.web_bible_tab)
@ -241,8 +239,7 @@ class BibleImportForm(OpenLPWizard):
self.web_translation_combo_box.setEnabled(False) self.web_translation_combo_box.setEnabled(False)
self.web_bible_layout.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.web_translation_combo_box) self.web_bible_layout.setWidget(2, QtWidgets.QFormLayout.FieldRole, self.web_translation_combo_box)
self.web_progress_bar = QtWidgets.QProgressBar(self) self.web_progress_bar = QtWidgets.QProgressBar(self)
# NOTE: Set to 2 since BibleServer has been disabled. Set to 3 if/when fixed self.web_progress_bar.setRange(0, 3)
self.web_progress_bar.setRange(0, 2)
self.web_progress_bar.setObjectName('WebTranslationProgressBar') self.web_progress_bar.setObjectName('WebTranslationProgressBar')
self.web_progress_bar.setVisible(False) self.web_progress_bar.setVisible(False)
self.web_bible_layout.setWidget(3, QtWidgets.QFormLayout.SpanningRole, self.web_progress_bar) self.web_bible_layout.setWidget(3, QtWidgets.QFormLayout.SpanningRole, self.web_progress_bar)
@ -403,9 +400,8 @@ class BibleImportForm(OpenLPWizard):
'Crosswalk')) 'Crosswalk'))
self.web_source_combo_box.setItemText(WebDownload.BibleGateway, translate('BiblesPlugin.ImportWizardForm', self.web_source_combo_box.setItemText(WebDownload.BibleGateway, translate('BiblesPlugin.ImportWizardForm',
'BibleGateway')) 'BibleGateway'))
# NOTE: BibleServer support has been disabled since we can't currently parse it. Re-add if/when fixed. self.web_source_combo_box.setItemText(WebDownload.BibleServer, translate('BiblesPlugin.ImportWizardForm',
# self.web_source_combo_box.setItemText(WebDownload.Bibleserver, translate('BiblesPlugin.ImportWizardForm', 'Bibleserver'))
# 'Bibleserver'))
self.web_translation_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bible:')) self.web_translation_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bible:'))
self.sword_bible_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bibles:')) self.sword_bible_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bibles:'))
self.sword_folder_label.setText(translate('BiblesPlugin.ImportWizardForm', 'SWORD data folder:')) self.sword_folder_label.setText(translate('BiblesPlugin.ImportWizardForm', 'SWORD data folder:'))
@ -584,7 +580,8 @@ class BibleImportForm(OpenLPWizard):
# TODO: Where does critical_error_message_box get %s string from? # TODO: Where does critical_error_message_box get %s string from?
# NOTE: BibleServer support has been disabled since we can't currently parse it. Re-add if/when fixed. # NOTE: BibleServer support has been disabled since we can't currently parse it. Re-add if/when fixed.
for (download_type, extractor) in ((WebDownload.Crosswalk, CWExtract()), for (download_type, extractor) in ((WebDownload.Crosswalk, CWExtract()),
(WebDownload.BibleGateway, BGExtract())): (WebDownload.BibleGateway, BGExtract()),
(WebDownload.BibleServer, BSExtract())):
try: try:
bibles = extractor.get_bibles_from_http() bibles = extractor.get_bibles_from_http()
except (urllib.error.URLError, ConnectionError): except (urllib.error.URLError, ConnectionError):
@ -770,4 +767,6 @@ class BibleImportForm(OpenLPWizard):
self.progress_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Your Bible import failed.')) self.progress_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Your Bible import failed.'))
del self.manager.db_cache[importer.name] del self.manager.db_cache[importer.name]
# Don't delete the db if it wasen't created
if hasattr(importer, 'file'):
delete_database(self.plugin.settings_section, importer.file) delete_database(self.plugin.settings_section, importer.file)

View File

@ -52,28 +52,82 @@ UGLY_CHARS = {
} }
VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*') VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*')
# Manually extracted from https://www.bibleserver.com/webmasters
BIBLESERVER_TRANSLATIONS = {
'ESV': {'name': 'English Standard Version', 'lang': 'English', 'sections': ['OT', 'NT']},
'NIV': {'name': 'New International Version', 'lang': 'English', 'sections': ['OT', 'NT']},
'NIRV': {'name': 'New Int. Readers Version', 'lang': 'English', 'sections': ['OT', 'NT']},
'KJV': {'name': 'King James Version', 'lang': 'English', 'sections': ['OT', 'NT']},
'LUT': {'name': 'Lutherbibel 2017', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']},
'ELB': {'name': 'Elberfelder Bibel', 'lang': 'German', 'sections': ['OT', 'NT']},
'HFA': {'name': 'Hoffnung für Alle', 'lang': 'German', 'sections': ['OT', 'NT']},
'SLT': {'name': 'Schlachter 2000', 'lang': 'German', 'sections': ['OT', 'NT']},
'ZB': {'name': 'Zürcher Bibel', 'lang': 'German', 'sections': ['OT', 'NT']},
'NGÜ': {'name': 'Neue Genfer Übersetzung', 'lang': 'German', 'sections': ['OT', 'NT']},
'GNB': {'name': 'Gute Nachricht Bibel', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']},
'EU': {'name': 'Einheitsübersetzung 2016', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']},
'NLB': {'name': 'Neues Leben. Die Bibel', 'lang': 'German', 'sections': ['OT', 'NT']},
'NeÜ': {'name': 'Neue evangelistische Übersetzung', 'lang': 'German', 'sections': ['OT', 'NT']},
'MENG': {'name': 'Menge Bibel', 'lang': 'German', 'sections': ['OT', 'NT', 'Apocrypha']},
'BDS': {'name': 'Bible du Semeur', 'lang': 'French', 'sections': ['OT', 'NT']},
'S21': {'name': 'Segond 21', 'lang': 'French', 'sections': ['OT', 'NT']},
'ITA': {'name': 'La Parola è Vita', 'lang': 'Italian', 'sections': ['OT', 'NT']},
'NRS': {'name': 'Nuova Riveduta 2006', 'lang': 'Italian', 'sections': ['OT', 'NT']},
'HTB': {'name': 'Het Boek', 'lang': 'Dutch', 'sections': ['OT', 'NT']},
'LSG': {'name': 'Louis Segond 1910', 'lang': 'French', 'sections': ['OT', 'NT']},
'CST': {'name': 'Nueva Versión Internacional (Castilian) ', 'lang': 'Spanish', 'sections': ['OT', 'NT']},
'NVI': {'name': 'Nueva Versión Internacional', 'lang': 'Spanish', 'sections': ['OT', 'NT']},
'BTX': {'name': 'La Biblia Textual', 'lang': 'Spanish', 'sections': ['OT', 'NT']},
'PRT': {'name': 'O Livro', 'lang': 'Portuguese', 'sections': ['OT', 'NT']},
'NOR': {'name': 'En Levende Bok', 'lang': 'Norwegian', 'sections': ['OT', 'NT']},
'BSV': {'name': 'Nya Levande Bibeln', 'lang': 'Swedish', 'sections': ['OT', 'NT']},
'DK': {'name': 'Bibelen på hverdagsdansk', 'lang': 'Danish', 'sections': ['OT', 'NT']},
'PSZ': {'name': 'Słowo Życia', 'lang': 'Polish', 'sections': ['OT', 'NT']},
'CEP': {'name': 'Český ekumenický překlad', 'lang': 'Czech', 'sections': ['OT', 'NT', 'Apocrypha']},
'SNC': {'name': 'Slovo na cestu', 'lang': 'Czech', 'sections': ['OT', 'NT']},
'B21': {'name': 'Bible, překlad 21. století', 'lang': 'Czech', 'sections': ['OT', 'NT', 'Apocrypha']},
'BKR': {'name': 'Bible Kralická', 'lang': 'Czech', 'sections': ['OT', 'NT']},
'NPK': {'name': 'Nádej pre kazdého', 'lang': 'Slovak', 'sections': ['OT', 'NT']},
'KAR': {'name': 'IBS-fordítás (Új Károli) ', 'lang': 'Hungarian', 'sections': ['OT']},
'HUN': {'name': 'Hungarian', 'lang': 'Hungarian', 'sections': ['OT', 'NT']},
'NTR': {'name': 'Noua traducere în limba românã', 'lang': 'Romanian', 'sections': ['OT', 'NT']},
'BGV': {'name': 'Верен', 'lang': 'Bulgarian', 'sections': ['OT', 'NT']},
'CBT': {'name': 'Библия, нов превод от оригиналните езици', 'lang': 'Bulgarian', 'sections': ['OT', 'NT',
'Apocrypha']},
'CKK': {'name': 'Knjiga O Kristu', 'lang': 'Croatian', 'sections': ['OT', 'NT']},
'RSZ': {'name': 'Новый перевод на русский язык', 'lang': 'Russian', 'sections': ['OT', 'NT']},
'CARS': {'name': 'Священное Писание, Восточный перевод', 'lang': 'Russian', 'sections': ['OT', 'NT']},
'TR': {'name': 'Türkçe', 'lang': 'Turkish', 'sections': ['OT', 'NT']},
'NAV': {'name': 'Ketab El Hayat', 'lang': 'Arabic', 'sections': ['OT', 'NT']},
'FCB': {'name': 'کتاب مقدس، ترجمه تفسیری', 'lang': 'Persian', 'sections': ['OT', 'NT']},
'CUVS': {'name': '中文和合本(简体) ', 'lang': 'Chinese (Simplified)', 'sections': ['OT', 'NT']},
'CCBT': {'name': '聖經當代譯本修訂版', 'lang': 'Chinese (Simplified)', 'sections': ['OT', 'NT']},
}
BIBLESERVER_LANGUAGE_CODE = { BIBLESERVER_LANGUAGE_CODE = {
'fl_1': 'de', 'German': 'de',
'fl_2': 'en', 'English': 'en',
'fl_3': 'fr', 'French': 'fr',
'fl_4': 'it', 'Italian': 'it',
'fl_5': 'es', 'Spanish': 'es',
'fl_6': 'pt', 'Portuguese': 'pt',
'fl_7': 'ru', 'Russian': 'ru',
'fl_8': 'sv', 'Swedish': 'sv',
'fl_9': 'no', 'Norwegian': 'no',
'fl_10': 'nl', 'Dutch': 'nl',
'fl_11': 'cs', 'Czech': 'cs',
'fl_12': 'sk', 'Slovak': 'sk',
'fl_13': 'ro', 'Romanian': 'ro',
'fl_14': 'hr', 'Croatian': 'hr',
'fl_15': 'hu', 'Hungarian': 'hu',
'fl_16': 'bg', 'Bulgarian': 'bg',
'fl_17': 'ar', 'Arabic': 'ar',
'fl_18': 'tr', 'Turkish': 'tr',
'fl_19': 'pl', 'Polish': 'pl',
'fl_20': 'da', 'Danish': 'da',
'fl_21': 'zh' 'Chinese (Simplified)': 'zh',
'Persian': 'fa'
} }
CROSSWALK_LANGUAGES = { CROSSWALK_LANGUAGES = {
@ -363,7 +417,7 @@ class BSExtract(RegistryProperties):
def get_bible_chapter(self, version, book_name, chapter): def get_bible_chapter(self, version, book_name, chapter):
""" """
Access and decode bibles via Bibleserver mobile website Access and decode bibles via Bibleserver AMP website
:param version: The version of the bible like NIV for New International Version :param version: The version of the bible like NIV for New International Version
:param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung :param book_name: Text name of bible book e.g. Genesis, 1. John, 1John or Offenbarung
@ -374,46 +428,44 @@ class BSExtract(RegistryProperties):
chapter=chapter)) chapter=chapter))
url_version = urllib.parse.quote(version.encode("utf-8")) url_version = urllib.parse.quote(version.encode("utf-8"))
url_book_name = urllib.parse.quote(book_name.encode("utf-8")) url_book_name = urllib.parse.quote(book_name.encode("utf-8"))
chapter_url = 'http://m.bibleserver.com/text/{version}/{name}{chapter:d}'.format(version=url_version, chapter_url = 'https://bibleserver.com/amp/{version}/{name}{chapter:d}'.format(version=url_version,
name=url_book_name, name=url_book_name,
chapter=chapter) chapter=chapter)
header = ('Accept-Language', 'en') soup = get_soup_for_bible_ref(chapter_url)
soup = get_soup_for_bible_ref(chapter_url, header)
if not soup: if not soup:
return None return None
self.application.process_events() self.application.process_events()
content = soup.find('div', 'content') content = soup.find_all('span', 'chapter-wrapper__verse')
if not content: if not content:
log.error('No verses found in the Bibleserver response.') log.error('No verses found in the Bibleserver response.')
send_error_message('parse') send_error_message('parse')
return None return None
content = content.find('div').find_all('div')
verses = {} verses = {}
for verse in content: for verse in content:
self.application.process_events() self.application.process_events()
versenumber = int(VERSE_NUMBER_REGEX.sub(r'\3', ' '.join(verse['class']))) versenumber = int(verse.find('span', 'chapter-wrapper__verse__number').get_text())
verses[versenumber] = verse.contents[1].rstrip('\n') verses[versenumber] = verse.find('span', 'chapter-wrapper__verse__content').get_text()
return SearchResults(book_name, chapter, verses) return SearchResults(book_name, chapter, verses)
def get_books_from_http(self, version): def get_books_from_http(self, version):
""" """
Load a list of all books a Bible contains from Bibleserver mobile website. Load a list of all books a Bible contains from BiblesResourcesDB.
:param version: The version of the Bible like NIV for New International Version :param version: The version of the Bible like NIV for New International Version
""" """
log.debug('BSExtract.get_books_from_http("{version}")'.format(version=version)) log.debug('BSExtract.get_books_from_http("{version}")'.format(version=version))
url_version = urllib.parse.quote(version.encode("utf-8")) # Parsing the book list from the website is near impossible, so we use the list from BiblesResourcesDB
chapter_url = 'http://m.bibleserver.com/overlay/selectBook?translation={version}'.format(version=url_version) bible = BIBLESERVER_TRANSLATIONS[version]
soup = get_soup_for_bible_ref(chapter_url) all_books = BiblesResourcesDB.get_books()
if not soup: books = []
return None for book in all_books:
content = soup.find('ul') if book['testament_id'] == 1 and 'OT' in bible['sections']:
if not content: books.append(book['name'])
log.error('No books found in the Bibleserver response.') elif book['testament_id'] == 2 and 'NT' in bible['sections']:
send_error_message('parse') books.append(book['name'])
return None elif book['testament_id'] == 3 and 'Apocrypha' in bible['sections']:
content = content.find_all('li') books.append(book['name'])
return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)] return books
def get_bibles_from_http(self): def get_bibles_from_http(self):
""" """
@ -422,34 +474,12 @@ class BSExtract(RegistryProperties):
returns a list in the form [(biblename, biblekey, language_code)] returns a list in the form [(biblename, biblekey, language_code)]
""" """
log.debug('BSExtract.get_bibles_from_http') log.debug('BSExtract.get_bibles_from_http')
bible_url = 'http://www.bibleserver.com/index.php?language=2' # we need to cheat a bit and load it from a hardcoded list since the website is not parsable anymore...
soup = get_soup_for_bible_ref(bible_url)
if not soup:
return None
bible_links = soup.find_all('a', {'class': 'trlCell'})
if not bible_links:
log.debug('No a tags found - did site change?')
return None
bibles = [] bibles = []
for link in bible_links: for bible in BIBLESERVER_TRANSLATIONS.keys():
bible_name = link.get_text() bible_item = BIBLESERVER_TRANSLATIONS[bible]
# Skip any audio bible_tuple = (bible_item['name'], bible, BIBLESERVER_LANGUAGE_CODE[bible_item['lang']])
if 'audio' in bible_name.lower(): bibles.append(bible_tuple)
continue
try:
bible_link = link['href']
bible_key = bible_link[bible_link.rfind('/') + 1:]
css_classes = link['class']
except KeyError:
log.debug('No href/class attribute found - did site change?')
language_code = ''
for css_class in css_classes:
if css_class.startswith('fl_'):
try:
language_code = BIBLESERVER_LANGUAGE_CODE[css_class]
except KeyError:
language_code = ''
bibles.append((bible_name, bible_key, language_code))
return bibles return bibles

View File

@ -21,7 +21,7 @@
""" """
This module contains tests for the http module of the Bibles plugin. This module contains tests for the http module of the Bibles plugin.
""" """
from unittest import TestCase from unittest import TestCase, skip
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -54,6 +54,7 @@ class TestBSExtract(TestCase):
self.socket_patcher.stop() self.socket_patcher.stop()
self.urllib_patcher.stop() self.urllib_patcher.stop()
@skip('BSExtract does not currently use http for books')
def test_get_books_from_http_no_soup(self): def test_get_books_from_http_no_soup(self):
""" """
Test the get_books_from_http method when get_soup_for_bible_ref returns a falsey value Test the get_books_from_http method when get_soup_for_bible_ref returns a falsey value
@ -77,6 +78,7 @@ class TestBSExtract(TestCase):
assert result is None, \ assert result is None, \
'BSExtract.get_books_from_http should return None when get_soup_for_bible_ref returns a false value' 'BSExtract.get_books_from_http should return None when get_soup_for_bible_ref returns a false value'
@skip('BSExtract does not currently use http for books')
def test_get_books_from_http_no_content(self): def test_get_books_from_http_no_content(self):
""" """
Test the get_books_from_http method when the specified element cannot be found in the tag object returned from Test the get_books_from_http method when the specified element cannot be found in the tag object returned from
@ -107,6 +109,7 @@ class TestBSExtract(TestCase):
assert result is None, \ assert result is None, \
'BSExtract.get_books_from_http should return None when get_soup_for_bible_ref returns a false value' 'BSExtract.get_books_from_http should return None when get_soup_for_bible_ref returns a false value'
@skip('BSExtract does not currently use http for books')
def test_get_books_from_http_content(self): def test_get_books_from_http_content(self):
""" """
Test the get_books_from_http method with sample HTML Test the get_books_from_http method with sample HTML

View File

@ -22,7 +22,7 @@
Package to test the openlp.plugin.bible.lib.https package. Package to test the openlp.plugin.bible.lib.https package.
""" """
import os import os
from unittest import TestCase, skipIf, skip from unittest import TestCase, skipIf
from unittest.mock import MagicMock from unittest.mock import MagicMock
from openlp.core.common.registry import Registry from openlp.core.common.registry import Registry
@ -122,7 +122,6 @@ class TestBibleHTTP(TestCase):
# THEN: We should get back a valid service item # THEN: We should get back a valid service item
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed' assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
@skip("We can't currently parse BibelServer")
def test_bibleserver_get_bibles(self): def test_bibleserver_get_bibles(self):
""" """
Test getting list of bibles from BibleServer.com Test getting list of bibles from BibleServer.com
@ -138,6 +137,21 @@ class TestBibleHTTP(TestCase):
assert ('New Int. Readers Version', 'NIRV', 'en') in bibles assert ('New Int. Readers Version', 'NIRV', 'en') in bibles
assert ('Священное Писание, Восточный перевод', 'CARS', 'ru') in bibles assert ('Священное Писание, Восточный перевод', 'CARS', 'ru') in bibles
def test_bibleserver_get_verse_text(self):
"""
Test verse text from bibleserver.com
"""
# GIVEN: A new Crosswalk extraction class
handler = BSExtract()
# WHEN: downloading NIV Genesis from Crosswalk
niv_genesis_chapter_one = handler.get_bible_chapter('NIV', 'Genesis', 1)
# THEN: The verse list should contain the verses
assert niv_genesis_chapter_one.has_verse_list() is True
assert 'In the beginning God created the heavens and the earth.' == niv_genesis_chapter_one.verse_list[1], \
'The first chapter of genesis should have been fetched.'
def test_biblegateway_get_bibles(self): def test_biblegateway_get_bibles(self):
""" """
Test getting list of bibles from BibleGateway.com Test getting list of bibles from BibleGateway.com