From 93fc6e014537b0959fd09e6dee1ce3fe24961958 Mon Sep 17 00:00:00 2001 From: Tomas Groth Date: Thu, 7 Jul 2016 22:56:50 +0200 Subject: [PATCH] Update Crosswalk webpage parser to match new layout. Fixes bug 1599999. Fixes: https://launchpad.net/bugs/1599999 --- openlp/plugins/bibles/lib/http.py | 40 +++++++++++-------- .../openlp_plugins/bibles/test_lib_http.py | 1 - 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/openlp/plugins/bibles/lib/http.py b/openlp/plugins/bibles/lib/http.py index c50745c2f..fce5d3285 100644 --- a/openlp/plugins/bibles/lib/http.py +++ b/openlp/plugins/bibles/lib/http.py @@ -532,28 +532,26 @@ class CWExtract(RegistryProperties): returns a list in the form [(biblename, biblekey, language_code)] """ log.debug('CWExtract.get_bibles_from_http') - bible_url = 'http://www.biblestudytools.com/' + bible_url = 'http://www.biblestudytools.com/bible-versions/' soup = get_soup_for_bible_ref(bible_url) if not soup: return None - bible_select = soup.find('select') - if not bible_select: - log.debug('No select tags found - did site change?') - return None - option_tags = bible_select.find_all('option', {'class': 'log-translation'}) - if not option_tags: - log.debug('No option tags found - did site change?') + h4_tags = soup.find_all('h4', {'class': 'small-header'}) + if not h4_tags: + log.debug('No h4 tags found - did site change?') return None bibles = [] - for ot in option_tags: - tag_text = ot.get_text().strip() - try: - tag_value = ot['value'] - except KeyError: - log.exception('No value attribute found - did site change?') + for h4t in h4_tags: + short_name = None + if h4t.span: + short_name = h4t.span.get_text().strip().lower() + else: + log.error('No span tag found - did site change?') return None - if not tag_value: + if not short_name: continue + h4t.span.extract() + tag_text = h4t.get_text().strip() # The names of non-english bibles has their language in parentheses at the end if tag_text.endswith(')'): language = tag_text[tag_text.rfind('(') + 1:-1] @@ -561,12 +559,20 @@ class CWExtract(RegistryProperties): language_code = CROSSWALK_LANGUAGES[language] else: language_code = '' - # ... except for the latin vulgate + # ... except for those that don't... elif 'latin' in tag_text.lower(): language_code = 'la' + elif 'la biblia' in tag_text.lower() or 'nueva' in tag_text.lower(): + language_code = 'es' + elif 'chinese' in tag_text.lower(): + language_code = 'zh' + elif 'greek' in tag_text.lower(): + language_code = 'el' + elif 'nova' in tag_text.lower(): + language_code = 'pt' else: language_code = 'en' - bibles.append((tag_text, tag_value, language_code)) + bibles.append((tag_text, short_name, language_code)) return bibles diff --git a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py index 4a7fb4af3..4ca4a8b0f 100644 --- a/tests/interfaces/openlp_plugins/bibles/test_lib_http.py +++ b/tests/interfaces/openlp_plugins/bibles/test_lib_http.py @@ -146,7 +146,6 @@ class TestBibleHTTP(TestCase): self.assertIsNotNone(bibles) self.assertIn(('Holman Christian Standard Bible', 'HCSB', 'en'), bibles) - @skip("Waiting for Crosswalk to fix their server") def test_crosswalk_get_bibles(self): """ Test getting list of bibles from Crosswalk.com