Fixes getting bible texts from CrossWalk

Fix to handling of form feed char and vertical tab char.

bzr-revno: 2697
This commit is contained in:
second@tgc.dk 2016-10-04 17:41:43 +01:00 committed by Tim Bentley
commit 5d24b2c473
5 changed files with 30 additions and 6 deletions

View File

@ -493,7 +493,7 @@ class CWExtract(RegistryProperties):
for verse in verses_div:
self.application.process_events()
verse_number = int(verse.find('strong').contents[0])
verse_span = verse.find('span')
verse_span = verse.find('span', class_='verse-%d' % verse_number)
tags_to_remove = verse_span.find_all(['a', 'sup'])
for tag in tags_to_remove:
tag.decompose()

View File

@ -101,7 +101,7 @@ class MediaShoutImport(SongImport):
self.song_book_name = song.SongID
for verse in verses:
tag = VERSE_TAGS[verse.Type] + str(verse.Number) if verse.Type < len(VERSE_TAGS) else 'O'
self.add_verse(verse.Text, tag)
self.add_verse(self.tidy_text(verse.Text), tag)
for order in verse_order:
if order.Type < len(VERSE_TAGS):
self.verse_order_list.append(VERSE_TAGS[order.Type] + str(order.Number))

View File

@ -140,10 +140,13 @@ class SongImport(QtCore.QObject):
text = text.replace('\u2026', '...')
text = text.replace('\u2013', '-')
text = text.replace('\u2014', '-')
# Replace vertical tab with 2 linebreaks
text = text.replace('\v', '\n\n')
# Replace form feed (page break) with 2 linebreaks
text = text.replace('\f', '\n\n')
# Remove surplus blank lines, spaces, trailing/leading spaces
text = re.sub(r'[ \t\v]+', ' ', text)
text = re.sub(r'[ \t]+', ' ', text)
text = re.sub(r' ?(\r\n?|\n) ?', '\n', text)
text = re.sub(r' ?(\n{5}|\f)+ ?', '\f', text)
return text
def process_song_text(self, text):

View File

@ -22,15 +22,20 @@
"""
Test the MediaShout importer
"""
from unittest import TestCase
from unittest import TestCase, skipUnless
from collections import namedtuple
from openlp.core.common import Registry
try:
from openlp.plugins.songs.lib.importers.mediashout import MediaShoutImport
CAN_RUN_TESTS = True
except ImportError:
CAN_RUN_TESTS = False
from tests.functional import MagicMock, patch, call
@skipUnless(CAN_RUN_TESTS, 'Not Windows, skipping test')
class TestMediaShoutImport(TestCase):
"""
Test the MediaShout importer

View File

@ -163,3 +163,19 @@ class TestBibleHTTP(TestCase):
# THEN: The list should not be None, and some known bibles should be there
self.assertIsNotNone(bibles)
self.assertIn(('Giovanni Diodati 1649 (Italian)', 'gdb', 'it'), bibles)
def test_crosswalk_get_verse_text(self):
"""
Test verse text from Crosswalk.com
"""
# GIVEN: A new Crosswalk extraction class
handler = CWExtract()
# WHEN: downloading NIV Genesis from Crosswalk
niv_genesis_chapter_one = handler.get_bible_chapter('niv', 'Genesis', 1)
# THEN: The verse list should contain the verses
self.assertTrue(niv_genesis_chapter_one.has_verse_list())
self.assertEquals('In the beginning God created the heavens and the earth.',
niv_genesis_chapter_one.verse_list[1],
'The first chapter of genesis should have been fetched.')