forked from openlp/openlp
Use BibleGateway standard site instead of the legacy site. Fixes bug 1562384.
Update Crosswalk webpage parser to match new layout. Fixes bug 1599999. Make easyslide importer try to recover when reading non-standard xml. Fixes bug 1588822. Fix handeling of control chars and escaped chars in VideoPsalm import. Fixes bug 1594945. bzr-revno: 2683 Fixes: https://launchpad.net/bugs/1562384, https://launchpad.net/bugs/1588822, https://launchpad.net/bugs/1594945, https://launchpad.net/bugs/1599999
This commit is contained in:
commit
291c9c8902
@ -252,7 +252,7 @@ class BGExtract(RegistryProperties):
|
||||
chapter=chapter,
|
||||
version=version)
|
||||
soup = get_soup_for_bible_ref(
|
||||
'http://legacy.biblegateway.com/passage/?{url}'.format(url=url_params),
|
||||
'http://biblegateway.com/passage/?{url}'.format(url=url_params),
|
||||
pre_parse_regex=r'<meta name.*?/>', pre_parse_substitute='')
|
||||
if not soup:
|
||||
return None
|
||||
@ -281,7 +281,7 @@ class BGExtract(RegistryProperties):
|
||||
"""
|
||||
log.debug('BGExtract.get_books_from_http("{version}")'.format(version=version))
|
||||
url_params = urllib.parse.urlencode({'action': 'getVersionInfo', 'vid': '{version}'.format(version=version)})
|
||||
reference_url = 'http://legacy.biblegateway.com/versions/?{url}#books'.format(url=url_params)
|
||||
reference_url = 'http://biblegateway.com/versions/?{url}#books'.format(url=url_params)
|
||||
page = get_web_page(reference_url)
|
||||
if not page:
|
||||
send_error_message('download')
|
||||
@ -312,7 +312,7 @@ class BGExtract(RegistryProperties):
|
||||
for book in content:
|
||||
book = book.find('td')
|
||||
if book:
|
||||
books.append(book.contents[0])
|
||||
books.append(book.contents[1])
|
||||
return books
|
||||
|
||||
def get_bibles_from_http(self):
|
||||
@ -322,11 +322,11 @@ class BGExtract(RegistryProperties):
|
||||
returns a list in the form [(biblename, biblekey, language_code)]
|
||||
"""
|
||||
log.debug('BGExtract.get_bibles_from_http')
|
||||
bible_url = 'https://legacy.biblegateway.com/versions/'
|
||||
bible_url = 'https://biblegateway.com/versions/'
|
||||
soup = get_soup_for_bible_ref(bible_url)
|
||||
if not soup:
|
||||
return None
|
||||
bible_select = soup.find('select', {'class': 'translation-dropdown'})
|
||||
bible_select = soup.find('select', {'class': 'search-translation-select'})
|
||||
if not bible_select:
|
||||
log.debug('No select tags found - did site change?')
|
||||
return None
|
||||
@ -532,28 +532,26 @@ class CWExtract(RegistryProperties):
|
||||
returns a list in the form [(biblename, biblekey, language_code)]
|
||||
"""
|
||||
log.debug('CWExtract.get_bibles_from_http')
|
||||
bible_url = 'http://www.biblestudytools.com/'
|
||||
bible_url = 'http://www.biblestudytools.com/bible-versions/'
|
||||
soup = get_soup_for_bible_ref(bible_url)
|
||||
if not soup:
|
||||
return None
|
||||
bible_select = soup.find('select')
|
||||
if not bible_select:
|
||||
log.debug('No select tags found - did site change?')
|
||||
return None
|
||||
option_tags = bible_select.find_all('option', {'class': 'log-translation'})
|
||||
if not option_tags:
|
||||
log.debug('No option tags found - did site change?')
|
||||
h4_tags = soup.find_all('h4', {'class': 'small-header'})
|
||||
if not h4_tags:
|
||||
log.debug('No h4 tags found - did site change?')
|
||||
return None
|
||||
bibles = []
|
||||
for ot in option_tags:
|
||||
tag_text = ot.get_text().strip()
|
||||
try:
|
||||
tag_value = ot['value']
|
||||
except KeyError:
|
||||
log.exception('No value attribute found - did site change?')
|
||||
for h4t in h4_tags:
|
||||
short_name = None
|
||||
if h4t.span:
|
||||
short_name = h4t.span.get_text().strip().lower()
|
||||
else:
|
||||
log.error('No span tag found - did site change?')
|
||||
return None
|
||||
if not tag_value:
|
||||
if not short_name:
|
||||
continue
|
||||
h4t.span.extract()
|
||||
tag_text = h4t.get_text().strip()
|
||||
# The names of non-english bibles has their language in parentheses at the end
|
||||
if tag_text.endswith(')'):
|
||||
language = tag_text[tag_text.rfind('(') + 1:-1]
|
||||
@ -561,12 +559,20 @@ class CWExtract(RegistryProperties):
|
||||
language_code = CROSSWALK_LANGUAGES[language]
|
||||
else:
|
||||
language_code = ''
|
||||
# ... except for the latin vulgate
|
||||
# ... except for those that don't...
|
||||
elif 'latin' in tag_text.lower():
|
||||
language_code = 'la'
|
||||
elif 'la biblia' in tag_text.lower() or 'nueva' in tag_text.lower():
|
||||
language_code = 'es'
|
||||
elif 'chinese' in tag_text.lower():
|
||||
language_code = 'zh'
|
||||
elif 'greek' in tag_text.lower():
|
||||
language_code = 'el'
|
||||
elif 'nova' in tag_text.lower():
|
||||
language_code = 'pt'
|
||||
else:
|
||||
language_code = 'en'
|
||||
bibles.append((tag_text, tag_value, language_code))
|
||||
bibles.append((tag_text, short_name, language_code))
|
||||
return bibles
|
||||
|
||||
|
||||
|
@ -46,7 +46,7 @@ class EasySlidesImport(SongImport):
|
||||
|
||||
def do_import(self):
|
||||
log.info('Importing EasySlides XML file {source}'.format(source=self.import_source))
|
||||
parser = etree.XMLParser(remove_blank_text=True)
|
||||
parser = etree.XMLParser(remove_blank_text=True, recover=True)
|
||||
parsed_file = etree.parse(self.import_source, parser)
|
||||
xml = etree.tostring(parsed_file).decode()
|
||||
song_xml = objectify.fromstring(xml)
|
||||
|
@ -73,6 +73,14 @@ class VideoPsalmImport(SongImport):
|
||||
processed_content += c
|
||||
c = next(file_content_it)
|
||||
processed_content += '"' + c
|
||||
# Remove control characters
|
||||
elif (c < chr(32)):
|
||||
processed_content += ' '
|
||||
# Handle escaped characters
|
||||
elif c == '\\':
|
||||
processed_content += c
|
||||
c = next(file_content_it)
|
||||
processed_content += c
|
||||
else:
|
||||
processed_content += c
|
||||
songbook = json.loads(processed_content.strip())
|
||||
|
@ -43,3 +43,5 @@ class TestVideoPsalmFileImport(SongImportTestHelper):
|
||||
"""
|
||||
self.file_import(os.path.join(TEST_PATH, 'videopsalm-as-safe-a-stronghold.json'),
|
||||
self.load_external_result_data(os.path.join(TEST_PATH, 'as-safe-a-stronghold.json')))
|
||||
self.file_import(os.path.join(TEST_PATH, 'videopsalm-as-safe-a-stronghold2.json'),
|
||||
self.load_external_result_data(os.path.join(TEST_PATH, 'as-safe-a-stronghold2.json')))
|
||||
|
@ -50,7 +50,8 @@ class TestBibleHTTP(TestCase):
|
||||
books = handler.get_books_from_http('NIV')
|
||||
|
||||
# THEN: We should get back a valid service item
|
||||
assert len(books) == 66, 'The bible should not have had any books added or removed'
|
||||
self.assertEqual(len(books), 66, 'The bible should not have had any books added or removed')
|
||||
self.assertEqual(books[0], 'Genesis', 'The first bible book should be Genesis')
|
||||
|
||||
def test_bible_gateway_extract_books_support_redirect(self):
|
||||
"""
|
||||
@ -63,7 +64,7 @@ class TestBibleHTTP(TestCase):
|
||||
books = handler.get_books_from_http('DN1933')
|
||||
|
||||
# THEN: We should get back a valid service item
|
||||
assert len(books) == 66, 'This bible should have 66 books'
|
||||
self.assertEqual(len(books), 66, 'This bible should have 66 books')
|
||||
|
||||
def test_bible_gateway_extract_verse(self):
|
||||
"""
|
||||
@ -76,7 +77,8 @@ class TestBibleHTTP(TestCase):
|
||||
results = handler.get_bible_chapter('NIV', 'John', 3)
|
||||
|
||||
# THEN: We should get back a valid service item
|
||||
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
|
||||
self.assertEqual(len(results.verse_list), 36,
|
||||
'The book of John should not have had any verses added or removed')
|
||||
|
||||
def test_bible_gateway_extract_verse_nkjv(self):
|
||||
"""
|
||||
@ -89,7 +91,8 @@ class TestBibleHTTP(TestCase):
|
||||
results = handler.get_bible_chapter('NKJV', 'John', 3)
|
||||
|
||||
# THEN: We should get back a valid service item
|
||||
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
|
||||
self.assertEqual(len(results.verse_list), 36,
|
||||
'The book of John should not have had any verses added or removed')
|
||||
|
||||
def test_crosswalk_extract_books(self):
|
||||
"""
|
||||
@ -102,7 +105,7 @@ class TestBibleHTTP(TestCase):
|
||||
books = handler.get_books_from_http('niv')
|
||||
|
||||
# THEN: We should get back a valid service item
|
||||
assert len(books) == 66, 'The bible should not have had any books added or removed'
|
||||
self.assertEqual(len(books), 66, 'The bible should not have had any books added or removed')
|
||||
|
||||
def test_crosswalk_extract_verse(self):
|
||||
"""
|
||||
@ -115,7 +118,8 @@ class TestBibleHTTP(TestCase):
|
||||
results = handler.get_bible_chapter('niv', 'john', 3)
|
||||
|
||||
# THEN: We should get back a valid service item
|
||||
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
|
||||
self.assertEqual(len(results.verse_list), 36,
|
||||
'The book of John should not have had any verses added or removed')
|
||||
|
||||
def test_bibleserver_get_bibles(self):
|
||||
"""
|
||||
@ -144,9 +148,8 @@ class TestBibleHTTP(TestCase):
|
||||
|
||||
# THEN: The list should not be None, and some known bibles should be there
|
||||
self.assertIsNotNone(bibles)
|
||||
self.assertIn(('Holman Christian Standard Bible', 'HCSB', 'en'), bibles)
|
||||
self.assertIn(('Holman Christian Standard Bible (HCSB)', 'HCSB', 'en'), bibles)
|
||||
|
||||
@skip("Waiting for Crosswalk to fix their server")
|
||||
def test_crosswalk_get_bibles(self):
|
||||
"""
|
||||
Test getting list of bibles from Crosswalk.com
|
||||
|
35
tests/resources/videopsalmsongs/as-safe-a-stronghold2.json
Normal file
35
tests/resources/videopsalmsongs/as-safe-a-stronghold2.json
Normal file
@ -0,0 +1,35 @@
|
||||
{
|
||||
"authors": [
|
||||
["Martin Luther", "words"],
|
||||
["Unknown", "music"]
|
||||
],
|
||||
"ccli_number": "12345",
|
||||
"comments": "This is\nthe first comment\nThis is\nthe second comment\nThis is\nthe third comment\n",
|
||||
"copyright": "Public Domain",
|
||||
"song_book_name": "SongBook1",
|
||||
"song_number": 0,
|
||||
"title": "A Safe Stronghold Our God is Still",
|
||||
"topics": [
|
||||
"tema1",
|
||||
"tema2"
|
||||
],
|
||||
"verse_order_list": [],
|
||||
"verses": [
|
||||
[
|
||||
"As safe a stronghold our God is still,\nA trusty shield and weapon;\nHe’ll help us clear from all the ill\nThat hath us now o’ertaken.\nThe ancient prince of hell\nHath risen with purpose fell;\nStrong mail of craft and power\nHe weareth in this hour;\nOn earth is not His fellow.",
|
||||
"v"
|
||||
],
|
||||
[
|
||||
"With \"force\" of arms we nothing can,\nFull soon were we down-ridden;\nBut for us fights \\ the proper Man,\nWhom God Himself hath bidden.\nAsk ye: Who is this same?\nChrist Jesus is His name,\nThe Lord Sabaoth’s Son;\nHe, and no other one,\nShall conquer in the battle.",
|
||||
"v"
|
||||
],
|
||||
[
|
||||
"And were this world all devils o’er,\nAnd watching to devour us,\nWe lay it not to heart so sore;\nNot they can overpower us.\nAnd let the prince of ill\nLook grim as e’er he will,\nHe harms us not a whit;\nFor why? his doom is writ;\nA word shall quickly slay him.",
|
||||
"v"
|
||||
],
|
||||
[
|
||||
"God’s word, for all their craft and force,\nOne moment will not linger,\nBut, spite of hell, shall have its course;\n’Tis written by His finger.\nAnd though they take our life,\nGoods, honour, children, wife,\nYet is their profit small:\nThese things shall vanish all;\nThe city of God remaineth.",
|
||||
"v"
|
||||
]
|
||||
]
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
{Abbreviation:"SB1",Copyright:"Public domain",Songs:[{ID:3,Composer:"Unknown",Author:"Martin Luther",Copyright:"Public
|
||||
Domain",Theme:"tema1
|
||||
tema2",CCLI:"12345",Alias:"A safe stronghold",Memo1:"This is
|
||||
the first comment
|
||||
",Memo2:"This is
|
||||
the second comment
|
||||
",Memo3:"This is
|
||||
the third comment
|
||||
",Reference:"reference",Guid:"jtCkrJdPIUOmECjaQylg/g",Verses:[{
|
||||
Text:"As safe a stronghold our God is still,
|
||||
A trusty shield and weapon;
|
||||
He’ll help us clear from all the ill
|
||||
That hath us now o’ertaken.
|
||||
The ancient prince of hell
|
||||
Hath risen with purpose fell;
|
||||
Strong mail of craft and power
|
||||
He weareth in this hour;
|
||||
On earth is not His fellow."},{ID:2,
|
||||
Text:"With \"force\" of arms we nothing can,
|
||||
Full soon were we down-ridden;
|
||||
But for us fights \\ the proper Man,
|
||||
Whom God Himself hath bidden.
|
||||
Ask ye: Who is this same?
|
||||
Christ Jesus is His name,
|
||||
The Lord Sabaoth’s Son;
|
||||
He, and no other one,
|
||||
Shall conquer in the battle."},{ID:3,
|
||||
Text:"And were this world all devils o’er,
|
||||
And watching to devour us,
|
||||
We lay it not to heart so sore;
|
||||
Not they can overpower us.
|
||||
And let the prince of ill
|
||||
Look grim as e’er he will,
|
||||
He harms us not a whit;
|
||||
For why? his doom is writ;
|
||||
A word shall quickly slay him."},{ID:4,
|
||||
Text:"God’s word, for all their craft and force,
|
||||
One moment will not linger,
|
||||
But, spite of hell, shall have its course;
|
||||
’Tis written by His finger.
|
||||
And though they take our life,
|
||||
Goods, honour, children, wife,
|
||||
Yet is their profit small:
|
||||
These things shall vanish all;
|
||||
The city of God remaineth."}],AudioFile:"282.mp3",IsAudioFileEnabled:1,
|
||||
Text:"A Safe Stronghold Our God is Still"}],Guid:"khiHU2blX0Kb41dGdbDLhA",VersionDate:"20121012000000",
|
||||
Text:"SongBook1"}
|
Loading…
Reference in New Issue
Block a user