Get the list of web-bibles from the websites, instead of using a static db.

bzr-revno: 2514
This commit is contained in:
Tomas Groth 2015-02-28 06:34:36 +00:00 committed by Tim Bentley
commit 79b66f9803
3 changed files with 269 additions and 44 deletions

View File

@ -24,6 +24,7 @@ The bible import functions for OpenLP
"""
import logging
import os
import urllib.error
from PyQt4 import QtGui
@ -34,6 +35,7 @@ from openlp.core.ui.wizard import OpenLPWizard, WizardStrings
from openlp.core.utils import get_locale_key
from openlp.plugins.bibles.lib.manager import BibleFormat
from openlp.plugins.bibles.lib.db import BiblesResourcesDB, clean_filename
from openlp.plugins.bibles.lib.http import CWExtract, BGExtract, BSExtract
log = logging.getLogger(__name__)
@ -90,7 +92,6 @@ class BibleImportForm(OpenLPWizard):
Perform any custom initialisation for bible importing.
"""
self.manager.set_process_dialog(self)
self.load_Web_Bibles()
self.restart()
self.select_stack.setCurrentIndex(0)
@ -104,6 +105,7 @@ class BibleImportForm(OpenLPWizard):
self.csv_verses_button.clicked.connect(self.on_csv_verses_browse_button_clicked)
self.open_song_browse_button.clicked.connect(self.on_open_song_browse_button_clicked)
self.zefania_browse_button.clicked.connect(self.on_zefania_browse_button_clicked)
self.web_update_button.clicked.connect(self.on_web_update_button_clicked)
def add_custom_pages(self):
"""
@ -202,20 +204,33 @@ class BibleImportForm(OpenLPWizard):
self.web_bible_tab.setObjectName('WebBibleTab')
self.web_bible_layout = QtGui.QFormLayout(self.web_bible_tab)
self.web_bible_layout.setObjectName('WebBibleLayout')
self.web_update_label = QtGui.QLabel(self.web_bible_tab)
self.web_update_label.setObjectName('WebUpdateLabel')
self.web_bible_layout.setWidget(0, QtGui.QFormLayout.LabelRole, self.web_update_label)
self.web_update_button = QtGui.QPushButton(self.web_bible_tab)
self.web_update_button.setObjectName('WebUpdateButton')
self.web_bible_layout.setWidget(0, QtGui.QFormLayout.FieldRole, self.web_update_button)
self.web_source_label = QtGui.QLabel(self.web_bible_tab)
self.web_source_label.setObjectName('WebSourceLabel')
self.web_bible_layout.setWidget(0, QtGui.QFormLayout.LabelRole, self.web_source_label)
self.web_bible_layout.setWidget(1, QtGui.QFormLayout.LabelRole, self.web_source_label)
self.web_source_combo_box = QtGui.QComboBox(self.web_bible_tab)
self.web_source_combo_box.setObjectName('WebSourceComboBox')
self.web_source_combo_box.addItems(['', '', ''])
self.web_bible_layout.setWidget(0, QtGui.QFormLayout.FieldRole, self.web_source_combo_box)
self.web_source_combo_box.setEnabled(False)
self.web_bible_layout.setWidget(1, QtGui.QFormLayout.FieldRole, self.web_source_combo_box)
self.web_translation_label = QtGui.QLabel(self.web_bible_tab)
self.web_translation_label.setObjectName('web_translation_label')
self.web_bible_layout.setWidget(1, QtGui.QFormLayout.LabelRole, self.web_translation_label)
self.web_bible_layout.setWidget(2, QtGui.QFormLayout.LabelRole, self.web_translation_label)
self.web_translation_combo_box = QtGui.QComboBox(self.web_bible_tab)
self.web_translation_combo_box.setSizeAdjustPolicy(QtGui.QComboBox.AdjustToContents)
self.web_translation_combo_box.setObjectName('WebTranslationComboBox')
self.web_bible_layout.setWidget(1, QtGui.QFormLayout.FieldRole, self.web_translation_combo_box)
self.web_translation_combo_box.setEnabled(False)
self.web_bible_layout.setWidget(2, QtGui.QFormLayout.FieldRole, self.web_translation_combo_box)
self.web_progress_bar = QtGui.QProgressBar(self)
self.web_progress_bar.setRange(0, 3)
self.web_progress_bar.setObjectName('WebTranslationProgressBar')
self.web_progress_bar.setVisible(False)
self.web_bible_layout.setWidget(3, QtGui.QFormLayout.SpanningRole, self.web_progress_bar)
self.web_tab_widget.addTab(self.web_bible_tab, '')
self.web_proxy_tab = QtGui.QWidget()
self.web_proxy_tab.setObjectName('WebProxyTab')
@ -314,6 +329,8 @@ class BibleImportForm(OpenLPWizard):
self.open_song_file_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bible file:'))
self.web_source_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Location:'))
self.zefania_file_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Bible file:'))
self.web_update_label.setText(translate('BiblesPlugin.ImportWizardForm', 'Click to download bible list'))
self.web_update_button.setText(translate('BiblesPlugin.ImportWizardForm', 'Download bible list'))
self.web_source_combo_box.setItemText(WebDownload.Crosswalk, translate('BiblesPlugin.ImportWizardForm',
'Crosswalk'))
self.web_source_combo_box.setItemText(WebDownload.BibleGateway, translate('BiblesPlugin.ImportWizardForm',
@ -388,8 +405,11 @@ class BibleImportForm(OpenLPWizard):
self.zefania_file_edit.setFocus()
return False
elif self.field('source_format') == BibleFormat.WebDownload:
self.version_name_edit.setText(self.web_translation_combo_box.currentText())
return True
# If count is 0 the bible list has not yet been downloaded
if self.web_translation_combo_box.count() == 0:
return False
else:
self.version_name_edit.setText(self.web_translation_combo_box.currentText())
return True
elif self.currentPage() == self.license_details_page:
license_version = self.field('license_version')
@ -434,9 +454,10 @@ class BibleImportForm(OpenLPWizard):
:param index: The index of the combo box.
"""
self.web_translation_combo_box.clear()
bibles = list(self.web_bible_list[index].keys())
bibles.sort(key=get_locale_key)
self.web_translation_combo_box.addItems(bibles)
if self.web_bible_list:
bibles = list(self.web_bible_list[index].keys())
bibles.sort(key=get_locale_key)
self.web_translation_combo_box.addItems(bibles)
def on_osis_browse_button_clicked(self):
"""
@ -475,6 +496,39 @@ class BibleImportForm(OpenLPWizard):
self.get_file_name(WizardStrings.OpenTypeFile % WizardStrings.ZEF, self.zefania_file_edit,
'last directory import')
def on_web_update_button_clicked(self):
"""
Download list of bibles from Crosswalk, BibleServer and BibleGateway.
"""
# Download from Crosswalk, BiblesGateway, BibleServer
self.web_bible_list = {}
self.web_source_combo_box.setEnabled(False)
self.web_translation_combo_box.setEnabled(False)
self.web_update_button.setEnabled(False)
self.web_progress_bar.setVisible(True)
self.web_progress_bar.setValue(0)
proxy_server = self.field('proxy_server')
for (download_type, extractor) in ((WebDownload.Crosswalk, CWExtract(proxy_server)),
(WebDownload.BibleGateway, BGExtract(proxy_server)),
(WebDownload.Bibleserver, BSExtract(proxy_server))):
try:
bibles = extractor.get_bibles_from_http()
except (urllib.error.URLError, ConnectionError) as err:
critical_error_message_box(translate('BiblesPlugin.ImportWizardForm', 'Error during download'),
translate('BiblesPlugin.ImportWizardForm',
'An error occurred while downloading the list of bibles from %s.'))
self.web_bible_list[download_type] = {}
for (bible_name, bible_key, language_code) in bibles:
self.web_bible_list[download_type][bible_name] = (bible_key, language_code)
self.web_progress_bar.setValue(download_type + 1)
# Update combo box if something got into the list
if self.web_bible_list:
self.on_web_source_combo_box_index_changed(0)
self.web_source_combo_box.setEnabled(True)
self.web_translation_combo_box.setEnabled(True)
self.web_update_button.setEnabled(True)
self.web_progress_bar.setVisible(False)
def register_fields(self):
"""
Register the bible import wizard fields.
@ -520,30 +574,6 @@ class BibleImportForm(OpenLPWizard):
self.on_web_source_combo_box_index_changed(WebDownload.Crosswalk)
settings.endGroup()
def load_Web_Bibles(self):
"""
Load the lists of Crosswalk, BibleGateway and Bibleserver bibles.
"""
# Load Crosswalk Bibles.
self.load_Bible_Resource(WebDownload.Crosswalk)
# Load BibleGateway Bibles.
self.load_Bible_Resource(WebDownload.BibleGateway)
# Load and Bibleserver Bibles.
self.load_Bible_Resource(WebDownload.Bibleserver)
def load_Bible_Resource(self, download_type):
"""
Loads a web bible from bible_resources.sqlite.
:param download_type: The WebDownload type e.g. bibleserver.
"""
self.web_bible_list[download_type] = {}
bibles = BiblesResourcesDB.get_webbibles(WebDownload.Names[download_type])
for bible in bibles:
version = bible['name']
name = bible['abbreviation']
self.web_bible_list[download_type][version] = name.strip()
def pre_wizard(self):
"""
Prepare the UI for the import.
@ -583,14 +613,15 @@ class BibleImportForm(OpenLPWizard):
self.progress_bar.setMaximum(1)
download_location = self.field('web_location')
bible_version = self.web_translation_combo_box.currentText()
bible = self.web_bible_list[download_location][bible_version]
(bible, language_id) = self.web_bible_list[download_location][bible_version]
importer = self.manager.import_bible(
BibleFormat.WebDownload, name=license_version,
download_source=WebDownload.Names[download_location],
download_name=bible,
proxy_server=self.field('proxy_server'),
proxy_username=self.field('proxy_username'),
proxy_password=self.field('proxy_password')
proxy_password=self.field('proxy_password'),
language_id=language_id
)
elif bible_type == BibleFormat.Zefania:
# Import an Zefania bible.

View File

@ -50,6 +50,38 @@ UGLY_CHARS = {
}
VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*')
BIBLESERVER_LANGUAGE_CODE = {
'fl_1': 'de',
'fl_2': 'en',
'fl_3': 'fr',
'fl_4': 'it',
'fl_5': 'es',
'fl_6': 'pt',
'fl_7': 'ru',
'fl_8': 'sv',
'fl_9': 'no',
'fl_10': 'nl',
'fl_11': 'cs',
'fl_12': 'sk',
'fl_13': 'ro',
'fl_14': 'hr',
'fl_15': 'hu',
'fl_16': 'bg',
'fl_17': 'ar',
'fl_18': 'tr',
'fl_19': 'pl',
'fl_20': 'da',
'fl_21': 'zh'
}
CROSSWALK_LANGUAGES = {
'Portuguese': 'pt',
'German': 'de',
'Italian': 'it',
'Español': 'es',
'French': 'fr',
'Dutch': 'nl'
}
log = logging.getLogger(__name__)
@ -222,6 +254,8 @@ class BGExtract(RegistryProperties):
if not soup:
return None
div = soup.find('div', 'result-text-style-normal')
if not div:
return None
self._clean_soup(div)
span_list = div.find_all('span', 'text')
log.debug('Span list: %s', span_list)
@ -278,6 +312,42 @@ class BGExtract(RegistryProperties):
books.append(book.contents[0])
return books
def get_bibles_from_http(self):
"""
Load a list of bibles from BibleGateway website.
returns a list in the form [(biblename, biblekey, language_code)]
"""
log.debug('BGExtract.get_bibles_from_http')
bible_url = 'https://legacy.biblegateway.com/versions/'
soup = get_soup_for_bible_ref(bible_url)
if not soup:
return None
bible_select = soup.find('select', {'class': 'translation-dropdown'})
if not bible_select:
log.debug('No select tags found - did site change?')
return None
option_tags = bible_select.find_all('option')
if not option_tags:
log.debug('No option tags found - did site change?')
return None
current_lang = ''
bibles = []
for ot in option_tags:
tag_class = ''
try:
tag_class = ot['class'][0]
except KeyError:
tag_class = ''
tag_text = ot.get_text()
if tag_class == 'lang':
current_lang = tag_text[tag_text.find('(') + 1:tag_text.find(')')].lower()
elif tag_class == 'spacer':
continue
else:
bibles.append((tag_text, ot['value'], current_lang))
return bibles
class BSExtract(RegistryProperties):
"""
@ -338,6 +408,43 @@ class BSExtract(RegistryProperties):
content = content.find_all('li')
return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)]
def get_bibles_from_http(self):
"""
Load a list of bibles from Bibleserver website.
returns a list in the form [(biblename, biblekey, language_code)]
"""
log.debug('BSExtract.get_bibles_from_http')
bible_url = 'http://www.bibleserver.com/index.php?language=2'
soup = get_soup_for_bible_ref(bible_url)
if not soup:
return None
bible_links = soup.find_all('a', {'class': 'trlCell'})
if not bible_links:
log.debug('No a tags found - did site change?')
return None
bibles = []
for link in bible_links:
bible_name = link.get_text()
# Skip any audio
if 'audio' in bible_name.lower():
continue
try:
bible_link = link['href']
bible_key = bible_link[bible_link.rfind('/') + 1:]
css_classes = link['class']
except KeyError:
log.debug('No href/class attribute found - did site change?')
language_code = ''
for css_class in css_classes:
if css_class.startswith('fl_'):
try:
language_code = BIBLESERVER_LANGUAGE_CODE[css_class]
except KeyError:
language_code = ''
bibles.append((bible_name, bible_key, language_code))
return bibles
class CWExtract(RegistryProperties):
"""
@ -408,6 +515,49 @@ class CWExtract(RegistryProperties):
books.append(book.contents[0])
return books
def get_bibles_from_http(self):
"""
Load a list of bibles from Crosswalk website.
returns a list in the form [(biblename, biblekey, language_code)]
"""
log.debug('CWExtract.get_bibles_from_http')
bible_url = 'http://www.biblestudytools.com/search/bible-search.part/'
soup = get_soup_for_bible_ref(bible_url)
if not soup:
return None
bible_select = soup.find('select')
if not bible_select:
log.debug('No select tags found - did site change?')
return None
option_tags = bible_select.find_all('option')
if not option_tags:
log.debug('No option tags found - did site change?')
return None
bibles = []
for ot in option_tags:
tag_text = ot.get_text().strip()
try:
tag_value = ot['value']
except KeyError:
log.exception('No value attribute found - did site change?')
return None
if not tag_value:
continue
# The names of non-english bibles has their language in parentheses at the end
if tag_text.endswith(')'):
language = tag_text[tag_text.rfind('(') + 1:-1]
if language in CROSSWALK_LANGUAGES:
language_code = CROSSWALK_LANGUAGES[language]
else:
language_code = ''
# ... except for the latin vulgate
elif 'latin' in tag_text.lower():
language_code = 'la'
else:
language_code = 'en'
bibles.append((tag_text, tag_value, language_code))
return bibles
class HTTPBible(BibleDB, RegistryProperties):
log.info('%s HTTPBible loaded', __name__)
@ -428,6 +578,7 @@ class HTTPBible(BibleDB, RegistryProperties):
self.proxy_server = None
self.proxy_username = None
self.proxy_password = None
self.language_id = None
if 'path' in kwargs:
self.path = kwargs['path']
if 'proxy_server' in kwargs:
@ -436,6 +587,8 @@ class HTTPBible(BibleDB, RegistryProperties):
self.proxy_username = kwargs['proxy_username']
if 'proxy_password' in kwargs:
self.proxy_password = kwargs['proxy_password']
if 'language_id' in kwargs:
self.language_id = kwargs['language_id']
def do_import(self, bible_name=None):
"""
@ -468,13 +621,11 @@ class HTTPBible(BibleDB, RegistryProperties):
return False
self.wizard.progress_bar.setMaximum(len(books) + 2)
self.wizard.increment_progress_bar(translate('BiblesPlugin.HTTPBible', 'Registering Language...'))
bible = BiblesResourcesDB.get_webbible(self.download_name, self.download_source.lower())
if bible['language_id']:
language_id = bible['language_id']
self.save_meta('language_id', language_id)
if self.language_id:
self.save_meta('language_id', self.language_id)
else:
language_id = self.get_language(bible_name)
if not language_id:
self.language_id = self.get_language(bible_name)
if not self.language_id:
log.error('Importing books from %s failed' % self.filename)
return False
for book in books:
@ -482,7 +633,7 @@ class HTTPBible(BibleDB, RegistryProperties):
break
self.wizard.increment_progress_bar(translate(
'BiblesPlugin.HTTPBible', 'Importing %s...', 'Importing <book name>...') % book)
book_ref_id = self.get_book_ref_id_by_name(book, len(books), language_id)
book_ref_id = self.get_book_ref_id_by_name(book, len(books), self.language_id)
if not book_ref_id:
log.error('Importing books from %s - download name: "%s" failed' %
(self.download_source, self.download_name))

View File

@ -25,7 +25,7 @@
from unittest import TestCase
from openlp.core.common import Registry
from openlp.plugins.bibles.lib.http import BGExtract, CWExtract
from openlp.plugins.bibles.lib.http import BGExtract, CWExtract, BSExtract
from tests.interfaces import MagicMock
@ -116,3 +116,46 @@ class TestBibleHTTP(TestCase):
# THEN: We should get back a valid service item
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
def bibleserver_get_bibles_test(self):
"""
Test getting list of bibles from BibelServer.com
"""
# GIVEN: A new Bible Server extraction class
handler = BSExtract()
# WHEN: downloading bible list from bibleserver
bibles = handler.get_bibles_from_http()
# THEN: The list should not be None, and some known bibles should be there
self.assertIsNotNone(bibles)
self.assertIn(('New Int. Readers Version', 'NIRV', 'en'), bibles)
self.assertIn(('Българската Библия', 'BLG', 'bg'), bibles)
def biblegateway_get_bibles_test(self):
"""
Test getting list of bibles from BibelGateway.com
"""
# GIVEN: A new Bible Gateway extraction class
handler = BGExtract()
# WHEN: downloading bible list from Crosswalk
bibles = handler.get_bibles_from_http()
# THEN: The list should not be None, and some known bibles should be there
self.assertIsNotNone(bibles)
self.assertIn(('Holman Christian Standard Bible', 'HCSB', 'en'), bibles)
def crosswalk_get_bibles_test(self):
"""
Test getting list of bibles from Crosswalk.com
"""
# GIVEN: A new Crosswalk extraction class
handler = CWExtract()
# WHEN: downloading bible list from Crosswalk
bibles = handler.get_bibles_from_http()
# THEN: The list should not be None, and some known bibles should be there
self.assertIsNotNone(bibles)
self.assertIn(('Giovanni Diodati 1649 (Italian)', 'gdb', 'it'), bibles)