Merge branch 'fix-crosswalk-bibles' into 'master'

Fix the CrossWalk Biblestudytools.com importer See merge request openlp/openlp!241
2020-09-26 20:05:42 +00:00 · 2020-09-26 20:05:42 +00:00 · bd32b892f2
commit bd32b892f2
parent fef70e4af4 7734ce89a0
2 changed files with 45 additions and 42 deletions
--- a/openlp/plugins/bibles/lib/importers/http.py
+++ b/openlp/plugins/bibles/lib/importers/http.py
@ -531,7 +531,7 @@ class CWExtract(RegistryProperties):
        verses = {}
        for verse in verses_div:
            self.application.process_events()
-            verse_number = int(verse.find('strong').contents[0])
+            verse_number = int(verse.find('span', 'verse-number').strong.contents[0])
            verse_span = verse.find('span', class_='verse-%d' % verse_number)
            tags_to_remove = verse_span.find_all(['a', 'sup'])
            for tag in tags_to_remove:
@ -576,22 +576,25 @@ class CWExtract(RegistryProperties):
        soup = get_soup_for_bible_ref(bible_url)
        if not soup:
            return None
-        h4_tags = soup.find_all('h4', {'class': 'small-header'})
+        # Get all <div class="col-md-12"> on the page
-        if not h4_tags:
+        content_column = soup.find('div', id='content-column')
-            log.debug('No h4 tags found - did site change?')
+        if not content_column:
            log.error('No div[id=content-column] -- the site must have changed')
            return None
        col_md_12_divs = content_column.find_all('div', 'col-md-12')
        if not col_md_12_divs:
            log.error('No div[class=col-md-12] -- the site must have changed')
            return None
        bibles = []
-        for h4t in h4_tags:
+        for col_md_12 in col_md_12_divs:
-            short_name = None
+            # Check if <a><strong><span class="text-muted"> is a direct descendant
-            if h4t.span:
+            if not col_md_12.a or not col_md_12.a.strong or not col_md_12.a.strong.span or \
-                short_name = h4t.span.get_text().strip().lower()
+                    'text-muted' not in col_md_12.a.strong.span['class']:
-            else:
+                continue
-                log.error('No span tag found - did site change?')
+            short_name = str(col_md_12.a.strong.span.string).strip().lower()
                return None
            if not short_name:
                continue
-            h4t.span.extract()
+            tag_text = str(col_md_12.a.strong.contents[0]).strip()
            tag_text = h4t.get_text().strip()
            # The names of non-english bibles has their language in parentheses at the end
            if tag_text.endswith(')'):
                language = tag_text[tag_text.rfind('(') + 1:-1]
--- a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py
+++ b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py
@ -124,6 +124,35 @@ class TestBibleHTTP(TestCase):
        # THEN: We should get back a valid service item
        assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
    def test_crosswalk_get_bibles(self):
        """
        Test getting list of bibles from Crosswalk.com
        """
        # GIVEN: A new Crosswalk extraction class
        handler = CWExtract()
        # WHEN: downloading bible list from Crosswalk
        bibles = handler.get_bibles_from_http()
        # THEN: The list should not be None, and some known bibles should be there
        assert bibles is not None
        assert ('Giovanni Diodati 1649 (Italian)', 'gdb', 'it') in bibles
    def test_crosswalk_get_verse_text(self):
        """
        Test verse text from Crosswalk.com
        """
        # GIVEN: A new Crosswalk extraction class
        handler = CWExtract()
        # WHEN: downloading NIV Genesis from Crosswalk
        niv_genesis_chapter_one = handler.get_bible_chapter('niv', 'Genesis', 1)
        # THEN: The verse list should contain the verses
        assert niv_genesis_chapter_one.has_verse_list() is True
        assert 'In the beginning God created the heavens and the earth.' == niv_genesis_chapter_one.verse_list[1], \
            'The first chapter of genesis should have been fetched.'
    def test_bibleserver_get_bibles(self):
        """
        Test getting list of bibles from BibleServer.com
@ -167,32 +196,3 @@ class TestBibleHTTP(TestCase):
        # THEN: The list should not be None, and some known bibles should be there
        assert bibles is not None
        assert ('Holman Christian Standard Bible (HCSB)', 'HCSB', 'en') in bibles
    def test_crosswalk_get_bibles(self):
        """
        Test getting list of bibles from Crosswalk.com
        """
        # GIVEN: A new Crosswalk extraction class
        handler = CWExtract()
        # WHEN: downloading bible list from Crosswalk
        bibles = handler.get_bibles_from_http()
        # THEN: The list should not be None, and some known bibles should be there
        assert bibles is not None
        assert ('Giovanni Diodati 1649 (Italian)', 'gdb', 'it') in bibles
    def test_crosswalk_get_verse_text(self):
        """
        Test verse text from Crosswalk.com
        """
        # GIVEN: A new Crosswalk extraction class
        handler = CWExtract()
        # WHEN: downloading NIV Genesis from Crosswalk
        niv_genesis_chapter_one = handler.get_bible_chapter('niv', 'Genesis', 1)
        # THEN: The verse list should contain the verses
        assert niv_genesis_chapter_one.has_verse_list() is True
        assert 'In the beginning God created the heavens and the earth.' == niv_genesis_chapter_one.verse_list[1], \
            'The first chapter of genesis should have been fetched.'