diff --git a/openlp/plugins/bibles/lib/importers/http.py b/openlp/plugins/bibles/lib/importers/http.py index 1266204e1..d35bf02d7 100644 --- a/openlp/plugins/bibles/lib/importers/http.py +++ b/openlp/plugins/bibles/lib/importers/http.py @@ -21,6 +21,7 @@ """ The :mod:`http` module enables OpenLP to retrieve scripture from bible websites. """ +import json import logging import re import socket @@ -516,14 +517,14 @@ class CWExtract(RegistryProperties): url_book_name = book_name.replace(' ', '-') url_book_name = url_book_name.lower() url_book_name = urllib.parse.quote(url_book_name.encode("utf-8")) - chapter_url = 'http://www.biblestudytools.com/{version}/{book}/{chapter}.html'.format(version=version, - book=url_book_name, - chapter=chapter) + chapter_url = 'https://www.biblestudytools.com/{version}/{book}/{chapter}.html'.format(version=version, + book=url_book_name, + chapter=chapter) soup = get_soup_for_bible_ref(chapter_url) if not soup: return None self.application.process_events() - verses_div = soup.find_all('div', 'verse') + verses_div = soup.find_all('div', {'data-verse-id': True}) if not verses_div: log.error('No verses found in the CrossWalk response.') send_error_message('parse') @@ -531,12 +532,11 @@ class CWExtract(RegistryProperties): verses = {} for verse in verses_div: self.application.process_events() - verse_number = int(verse.find('span', 'verse-number').strong.contents[0]) - verse_span = verse.find('span', class_='verse-%d' % verse_number) - tags_to_remove = verse_span.find_all(['a', 'sup']) + verse_number = int(verse['data-verse-id']) + tags_to_remove = verse.find_all(['a', 'sup', 'h3']) for tag in tags_to_remove: tag.decompose() - verse_text = verse_span.get_text() + verse_text = verse.get_text() self.application.process_events() # Fix up leading and trailing spaces, multiple spaces, and spaces between text and , and . verse_text = verse_text.strip('\n\r\t ') @@ -552,18 +552,25 @@ class CWExtract(RegistryProperties): :param version: The version of the bible like NIV for New International Version """ log.debug('CWExtract.get_books_from_http("{version}")'.format(version=version)) - chapter_url = 'http://www.biblestudytools.com/{version}/'.format(version=version) - soup = get_soup_for_bible_ref(chapter_url) - if not soup: - return None - content = soup.find_all('h4', {'class': 'small-header'}) - if not content: - log.error('No books found in the Crosswalk response.') - send_error_message('parse') - return None + books_url = 'https://www.biblestudytools.com/api/bible/books-selection/?translationCode={version}' + books_url = books_url.format(version=version) books = [] - for book in content: - books.append(book.contents[0]) + books_page = get_web_page(books_url) + if not books_page: + log.error('No books found in the CrossWalk response.') + send_error_message('parse') + return books + books_json = json.loads(books_page) + for book in books_json: + # the link looks like this: https://www.biblestudytools.com/bla/2-corintios/ + link = book['link'] + # remove trailing forward slash + link = link.strip('/') + # remove everything before the book name/code + book_name = link[link.rfind('/') + 1:] + # replace dash with space + book_name = book_name.replace('-', ' ') + books.append(book_name) return books def get_bibles_from_http(self): @@ -572,29 +579,16 @@ class CWExtract(RegistryProperties): returns a list in the form [(biblename, biblekey, language_code)] """ log.debug('CWExtract.get_bibles_from_http') - bible_url = 'http://www.biblestudytools.com/bible-versions/' + bible_url = 'https://www.biblestudytools.com/' soup = get_soup_for_bible_ref(bible_url) if not soup: return None - # Get all
on the page - content_column = soup.find('div', id='content-column') - if not content_column: - log.error('No div[id=content-column] -- the site must have changed') - return None - col_md_12_divs = content_column.find_all('div', 'col-md-12') - if not col_md_12_divs: - log.error('No div[class=col-md-12] -- the site must have changed') - return None + # Get all