forked from openlp/openlp
Fix parsing biblestudytools.com. Fixes bug 1418212.
Fixes: https://launchpad.net/bugs/1418212
This commit is contained in:
parent
7287759ba5
commit
a5b92f1e87
@ -365,31 +365,20 @@ class CWExtract(RegistryProperties):
|
|||||||
if not soup:
|
if not soup:
|
||||||
return None
|
return None
|
||||||
self.application.process_events()
|
self.application.process_events()
|
||||||
html_verses = soup.find_all('span', 'versetext')
|
verses_div = soup.find_all('div', 'verse')
|
||||||
if not html_verses:
|
if not verses_div:
|
||||||
log.error('No verses found in the CrossWalk response.')
|
log.error('No verses found in the CrossWalk response.')
|
||||||
send_error_message('parse')
|
send_error_message('parse')
|
||||||
return None
|
return None
|
||||||
verses = {}
|
verses = {}
|
||||||
for verse in html_verses:
|
for verse in verses_div:
|
||||||
self.application.process_events()
|
self.application.process_events()
|
||||||
verse_number = int(verse.contents[0].contents[0])
|
verse_number = int(verse.find('strong').contents[0])
|
||||||
verse_text = ''
|
verse_span = verse.find('span')
|
||||||
for part in verse.contents:
|
tags_to_remove = verse_span.find_all(['a', 'sup'])
|
||||||
self.application.process_events()
|
for tag in tags_to_remove:
|
||||||
if isinstance(part, NavigableString):
|
tag.decompose()
|
||||||
verse_text += part
|
verse_text = verse_span.get_text()
|
||||||
elif part and part.attrMap and \
|
|
||||||
(part.attrMap['class'] == 'WordsOfChrist' or part.attrMap['class'] == 'strongs'):
|
|
||||||
for subpart in part.contents:
|
|
||||||
self.application.process_events()
|
|
||||||
if isinstance(subpart, NavigableString):
|
|
||||||
verse_text += subpart
|
|
||||||
elif subpart and subpart.attrMap and subpart.attrMap['class'] == 'strongs':
|
|
||||||
for subsub in subpart.contents:
|
|
||||||
self.application.process_events()
|
|
||||||
if isinstance(subsub, NavigableString):
|
|
||||||
verse_text += subsub
|
|
||||||
self.application.process_events()
|
self.application.process_events()
|
||||||
# Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and .
|
# Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and .
|
||||||
verse_text = verse_text.strip('\n\r\t ')
|
verse_text = verse_text.strip('\n\r\t ')
|
||||||
@ -409,16 +398,13 @@ class CWExtract(RegistryProperties):
|
|||||||
soup = get_soup_for_bible_ref(chapter_url)
|
soup = get_soup_for_bible_ref(chapter_url)
|
||||||
if not soup:
|
if not soup:
|
||||||
return None
|
return None
|
||||||
content = soup.find('div', {'class': 'Body'})
|
content = soup.find_all(('h4', {'class': 'small-header'}))
|
||||||
content = content.find('ul', {'class': 'parent'})
|
|
||||||
if not content:
|
if not content:
|
||||||
log.error('No books found in the Crosswalk response.')
|
log.error('No books found in the Crosswalk response.')
|
||||||
send_error_message('parse')
|
send_error_message('parse')
|
||||||
return None
|
return None
|
||||||
content = content.find_all('li')
|
|
||||||
books = []
|
books = []
|
||||||
for book in content:
|
for book in content:
|
||||||
book = book.find('a')
|
|
||||||
books.append(book.contents[0])
|
books.append(book.contents[0])
|
||||||
return books
|
return books
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user