forked from openlp/openlp
Fix the CrossWalk Biblestudytools.com importer
This commit is contained in:
parent
844399b54f
commit
7734ce89a0
@ -531,7 +531,7 @@ class CWExtract(RegistryProperties):
|
|||||||
verses = {}
|
verses = {}
|
||||||
for verse in verses_div:
|
for verse in verses_div:
|
||||||
self.application.process_events()
|
self.application.process_events()
|
||||||
verse_number = int(verse.find('strong').contents[0])
|
verse_number = int(verse.find('span', 'verse-number').strong.contents[0])
|
||||||
verse_span = verse.find('span', class_='verse-%d' % verse_number)
|
verse_span = verse.find('span', class_='verse-%d' % verse_number)
|
||||||
tags_to_remove = verse_span.find_all(['a', 'sup'])
|
tags_to_remove = verse_span.find_all(['a', 'sup'])
|
||||||
for tag in tags_to_remove:
|
for tag in tags_to_remove:
|
||||||
@ -576,22 +576,25 @@ class CWExtract(RegistryProperties):
|
|||||||
soup = get_soup_for_bible_ref(bible_url)
|
soup = get_soup_for_bible_ref(bible_url)
|
||||||
if not soup:
|
if not soup:
|
||||||
return None
|
return None
|
||||||
h4_tags = soup.find_all('h4', {'class': 'small-header'})
|
# Get all <div class="col-md-12"> on the page
|
||||||
if not h4_tags:
|
content_column = soup.find('div', id='content-column')
|
||||||
log.debug('No h4 tags found - did site change?')
|
if not content_column:
|
||||||
|
log.error('No div[id=content-column] -- the site must have changed')
|
||||||
|
return None
|
||||||
|
col_md_12_divs = content_column.find_all('div', 'col-md-12')
|
||||||
|
if not col_md_12_divs:
|
||||||
|
log.error('No div[class=col-md-12] -- the site must have changed')
|
||||||
return None
|
return None
|
||||||
bibles = []
|
bibles = []
|
||||||
for h4t in h4_tags:
|
for col_md_12 in col_md_12_divs:
|
||||||
short_name = None
|
# Check if <a><strong><span class="text-muted"> is a direct descendant
|
||||||
if h4t.span:
|
if not col_md_12.a or not col_md_12.a.strong or not col_md_12.a.strong.span or \
|
||||||
short_name = h4t.span.get_text().strip().lower()
|
'text-muted' not in col_md_12.a.strong.span['class']:
|
||||||
else:
|
continue
|
||||||
log.error('No span tag found - did site change?')
|
short_name = str(col_md_12.a.strong.span.string).strip().lower()
|
||||||
return None
|
|
||||||
if not short_name:
|
if not short_name:
|
||||||
continue
|
continue
|
||||||
h4t.span.extract()
|
tag_text = str(col_md_12.a.strong.contents[0]).strip()
|
||||||
tag_text = h4t.get_text().strip()
|
|
||||||
# The names of non-english bibles has their language in parentheses at the end
|
# The names of non-english bibles has their language in parentheses at the end
|
||||||
if tag_text.endswith(')'):
|
if tag_text.endswith(')'):
|
||||||
language = tag_text[tag_text.rfind('(') + 1:-1]
|
language = tag_text[tag_text.rfind('(') + 1:-1]
|
||||||
|
@ -124,6 +124,35 @@ class TestBibleHTTP(TestCase):
|
|||||||
# THEN: We should get back a valid service item
|
# THEN: We should get back a valid service item
|
||||||
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
|
assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
|
||||||
|
|
||||||
|
def test_crosswalk_get_bibles(self):
|
||||||
|
"""
|
||||||
|
Test getting list of bibles from Crosswalk.com
|
||||||
|
"""
|
||||||
|
# GIVEN: A new Crosswalk extraction class
|
||||||
|
handler = CWExtract()
|
||||||
|
|
||||||
|
# WHEN: downloading bible list from Crosswalk
|
||||||
|
bibles = handler.get_bibles_from_http()
|
||||||
|
|
||||||
|
# THEN: The list should not be None, and some known bibles should be there
|
||||||
|
assert bibles is not None
|
||||||
|
assert ('Giovanni Diodati 1649 (Italian)', 'gdb', 'it') in bibles
|
||||||
|
|
||||||
|
def test_crosswalk_get_verse_text(self):
|
||||||
|
"""
|
||||||
|
Test verse text from Crosswalk.com
|
||||||
|
"""
|
||||||
|
# GIVEN: A new Crosswalk extraction class
|
||||||
|
handler = CWExtract()
|
||||||
|
|
||||||
|
# WHEN: downloading NIV Genesis from Crosswalk
|
||||||
|
niv_genesis_chapter_one = handler.get_bible_chapter('niv', 'Genesis', 1)
|
||||||
|
|
||||||
|
# THEN: The verse list should contain the verses
|
||||||
|
assert niv_genesis_chapter_one.has_verse_list() is True
|
||||||
|
assert 'In the beginning God created the heavens and the earth.' == niv_genesis_chapter_one.verse_list[1], \
|
||||||
|
'The first chapter of genesis should have been fetched.'
|
||||||
|
|
||||||
def test_bibleserver_get_bibles(self):
|
def test_bibleserver_get_bibles(self):
|
||||||
"""
|
"""
|
||||||
Test getting list of bibles from BibleServer.com
|
Test getting list of bibles from BibleServer.com
|
||||||
@ -167,32 +196,3 @@ class TestBibleHTTP(TestCase):
|
|||||||
# THEN: The list should not be None, and some known bibles should be there
|
# THEN: The list should not be None, and some known bibles should be there
|
||||||
assert bibles is not None
|
assert bibles is not None
|
||||||
assert ('Holman Christian Standard Bible (HCSB)', 'HCSB', 'en') in bibles
|
assert ('Holman Christian Standard Bible (HCSB)', 'HCSB', 'en') in bibles
|
||||||
|
|
||||||
def test_crosswalk_get_bibles(self):
|
|
||||||
"""
|
|
||||||
Test getting list of bibles from Crosswalk.com
|
|
||||||
"""
|
|
||||||
# GIVEN: A new Crosswalk extraction class
|
|
||||||
handler = CWExtract()
|
|
||||||
|
|
||||||
# WHEN: downloading bible list from Crosswalk
|
|
||||||
bibles = handler.get_bibles_from_http()
|
|
||||||
|
|
||||||
# THEN: The list should not be None, and some known bibles should be there
|
|
||||||
assert bibles is not None
|
|
||||||
assert ('Giovanni Diodati 1649 (Italian)', 'gdb', 'it') in bibles
|
|
||||||
|
|
||||||
def test_crosswalk_get_verse_text(self):
|
|
||||||
"""
|
|
||||||
Test verse text from Crosswalk.com
|
|
||||||
"""
|
|
||||||
# GIVEN: A new Crosswalk extraction class
|
|
||||||
handler = CWExtract()
|
|
||||||
|
|
||||||
# WHEN: downloading NIV Genesis from Crosswalk
|
|
||||||
niv_genesis_chapter_one = handler.get_bible_chapter('niv', 'Genesis', 1)
|
|
||||||
|
|
||||||
# THEN: The verse list should contain the verses
|
|
||||||
assert niv_genesis_chapter_one.has_verse_list() is True
|
|
||||||
assert 'In the beginning God created the heavens and the earth.' == niv_genesis_chapter_one.verse_list[1], \
|
|
||||||
'The first chapter of genesis should have been fetched.'
|
|
||||||
|
Loading…
Reference in New Issue
Block a user