Initial support for fetching list of web bibles. Not used yet.

This commit is contained in:
Tomas Groth 2015-02-17 22:58:29 +00:00
parent 7c0386b7c0
commit 52f3695334
2 changed files with 133 additions and 1 deletions

View File

@ -50,6 +50,29 @@ UGLY_CHARS = {
} }
VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*') VERSE_NUMBER_REGEX = re.compile(r'v(\d{1,2})(\d{3})(\d{3}) verse.*')
BIBLESERVER_LANGUAGE_CODE = {
'fl_1': 'de',
'fl_2': 'en',
'fl_3': 'fr',
'fl_4': 'it',
'fl_5': 'es',
'fl_6': 'pt',
'fl_7': 'ru',
'fl_8': 'sv',
'fl_9': 'no',
'fl_10': 'nl',
'fl_11': 'cs',
'fl_12': 'sk',
'fl_13': 'ro',
'fl_14': 'hr',
'fl_15': 'hu',
'fl_16': 'bg',
'fl_17': 'ar',
'fl_18': 'tr',
'fl_19': 'pl',
'fl_20': 'da',
'fl_21': 'zh'
}
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -278,6 +301,32 @@ class BGExtract(RegistryProperties):
books.append(book.contents[0]) books.append(book.contents[0])
return books return books
def get_bibles_from_http(self):
"""
Load a list of bibles from BibleGateway website.
"""
log.debug('BGExtract.get_bibles_from_http')
bible_url = 'https://legacy.biblegateway.com/versions/'
soup = get_soup_for_bible_ref(bible_url)
if not soup:
return None
bible_select = soup.find('select', {'class': 'translation-dropdown'})
option_tags = bible_select.find_all('option')
current_lang = ''
for ot in option_tags:
tag_class = ''
try:
tag_class = ot['class'][0]
except KeyError:
tag_class = ''
tag_text = ot.get_text()
if tag_class == 'lang':
current_lang = tag_text[tag_text.find('(') + 1:tag_text.find(')') + 1].lower()
elif tag_class == 'spacer':
continue
else:
print('biblename: %s, bible_key: %s, class: %s' % (tag_text, ot['value'], current_lang))
class BSExtract(RegistryProperties): class BSExtract(RegistryProperties):
""" """
@ -338,6 +387,33 @@ class BSExtract(RegistryProperties):
content = content.find_all('li') content = content.find_all('li')
return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)] return [book.contents[0].contents[0] for book in content if len(book.contents[0].contents)]
def get_bibles_from_http(self):
"""
Load a list of bibles from Bibleserver website.
"""
log.debug('BSExtract.get_bibles_from_http')
bible_url = 'http://www.bibleserver.com/index.php?language=2'
soup = get_soup_for_bible_ref(bible_url)
if not soup:
return None
bible_links = soup.find_all('a', {'class': 'trlCell'})
for link in bible_links:
bible_name = link.get_text()
# Skip any audio
if 'audio' in bible_name.lower():
continue
bible_link = link['href']
bible_key = bible_link[bible_link.rfind('/') + 1:]
css_classes = link['class']
language_code = ''
for css_class in css_classes:
if css_class.startswith('fl_'):
try:
language_code = BIBLESERVER_LANGUAGE_CODE[css_class]
except KeyError:
language_code = ''
print('biblename: %s, bible_key: %s, class: %s' % (bible_name, bible_key, language_code))
class CWExtract(RegistryProperties): class CWExtract(RegistryProperties):
""" """
@ -408,6 +484,32 @@ class CWExtract(RegistryProperties):
books.append(book.contents[0]) books.append(book.contents[0])
return books return books
def get_bibles_from_http(self):
"""
Load a list of bibles from Crosswalk website.
"""
log.debug('CWExtract.get_bibles_from_http')
bible_url = 'http://www.biblestudytools.com/search/bible-search.part/'
soup = get_soup_for_bible_ref(bible_url)
if not soup:
return None
bible_select = soup.find('select')
option_tags = bible_select.find_all('option')
for ot in option_tags:
tag_text = ot.get_text()
tag_value = ot['value']
if not tag_value:
continue
# The names of non-english bibles has their language in parentheses at the end
if tag_text.endswith(')'):
language_code = ''
# ... except for the latin vulgate
elif 'latin' in tag_text.lower():
language_code = 'la'
else:
language_code = 'en'
print('biblename: %s, bible_key: %s, class: %s' % (tag_text, tag_value, language_code))
class HTTPBible(BibleDB, RegistryProperties): class HTTPBible(BibleDB, RegistryProperties):
log.info('%s HTTPBible loaded', __name__) log.info('%s HTTPBible loaded', __name__)

View File

@ -25,7 +25,7 @@
from unittest import TestCase from unittest import TestCase
from openlp.core.common import Registry from openlp.core.common import Registry
from openlp.plugins.bibles.lib.http import BGExtract, CWExtract from openlp.plugins.bibles.lib.http import BGExtract, CWExtract, BSExtract
from tests.interfaces import MagicMock from tests.interfaces import MagicMock
@ -116,3 +116,33 @@ class TestBibleHTTP(TestCase):
# THEN: We should get back a valid service item # THEN: We should get back a valid service item
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed' assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
def bibleserver_get_bibles_test(self):
"""
Test getting list of bibles from BibelServer.com
"""
# GIVEN: A new Bible Server extraction class
handler = BSExtract()
handler.get_bibles_from_http()
self.assertTrue(False)
def biblegateway_get_bibles_test(self):
"""
Test getting list of bibles from BibelGateway.com
"""
# GIVEN: A new Bible Gateway extraction class
handler = BGExtract()
handler.get_bibles_from_http()
self.assertTrue(False)
def crosswalk_get_bibles_test(self):
"""
Test getting list of bibles from Crosswalk.com
"""
# GIVEN: A new Crosswalk extraction class
handler = CWExtract()
handler.get_bibles_from_http()
self.assertTrue(False)