Add solution to remove extra whitespaces from OL <lines> tag

Related to #550
This commit is contained in:
Gyuris Gellért 2020-08-13 19:40:14 +00:00 committed by Tomas Groth
parent a944791a70
commit 7df43c7dfb
4 changed files with 85 additions and 1 deletions

View File

@ -61,7 +61,15 @@ class OpenLyricsImport(SongImport):
# Pass a file object, because lxml does not cope with some
# special characters in the path (see lp:757673 and lp:744337).
parsed_file = etree.parse(file_path.open('rb'), parser)
xml = etree.tostring(parsed_file).decode()
# Remove whitespaces from <lines> tags and its descendants
root = parsed_file.getroot()
for elem in root.iter('{*}lines'):
self._strip_whitespace(elem)
for subelem in elem.iter('*'):
self._strip_whitespace(subelem)
xml = etree.tostring(root).decode()
self.open_lyrics.xml_to_song(xml)
except etree.XMLSyntaxError:
log.exception('XML syntax error in file {path}'.format(path=file_path))
@ -71,3 +79,12 @@ class OpenLyricsImport(SongImport):
name=file_path,
text=exception.log_message))
self.log_error(file_path, exception.display_message)
def _strip_whitespace(self, elem):
"""
Remove leading and trailing whitespace from the 'text' and 'tail' attributes of an etree._Element object
"""
if elem.text is not None:
elem.text = elem.text.strip()
if elem.tail is not None:
elem.tail = elem.tail.strip()

View File

@ -234,3 +234,31 @@ class TestOpenLyricsImport(TestCase, TestMixin):
# THEN: add_songbook_entry should have been called twice
assert mocked_song.method_calls[0][1][1] == '48'
assert mocked_song.method_calls[1][1][1] == '445 A'
def test_leading_and_trailing_whitespaces_inside_lines_tags_are_removed(self):
"""
Test that leading and trailing whitespace inside <lines> tags and its descendants are removed
"""
# GIVEN: One OpenLyrics XML with extra whitespaces in <lines> tag (Amazing_Grace_1.xml)
# and a copy which only difference is that it lacks those whitespaces (Amazing_Grace_2.xml)
mocked_manager = MagicMock()
mocked_import_wizard = MagicMock()
importer = OpenLyricsImport(mocked_manager, file_paths=[])
importer.import_wizard = mocked_import_wizard
importer.open_lyrics = MagicMock()
importer.open_lyrics.xml_to_song = MagicMock()
# WHEN: Importing the file not having those whitespaces...
importer.import_source = [TEST_PATH / 'Amazing_Grace_2.xml']
importer.do_import()
# keep the parsed XML which is assumed to be the first positional argument of the xml_to_song() method
importer.open_lyrics.xml_to_song.assert_called()
no_whitespaces_xml = importer.open_lyrics.xml_to_song.call_args[0][0]
# ... and importing the file having those whitespaces
importer.import_source = [TEST_PATH / 'Amazing_Grace_1.xml']
importer.do_import()
# THEN: The last call of the xml_to_song() method should have got the same XML content as its first call
importer.open_lyrics.xml_to_song.assert_called_with(no_whitespaces_xml)

View File

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<song xmlns="http://openlyrics.info/namespace/2009/song"
version="0.8"
createdIn="OpenLP 1.9.0"
modifiedIn="MyApp 0.0.1"
modifiedDate="2012-04-10T22:00:00+10:00">
<properties>
<titles>
<title>Amazing Grace</title>
</titles>
</properties>
<lyrics>
<verse name="v1">
<lines>
Amazing grace, how sweet the sound<br/>
That saved a wretch like me!<br/>
I once was lost, but now am found,<br/>
Was blind but now I see.
</lines>
</verse>
</lyrics>
</song>

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<song xmlns="http://openlyrics.info/namespace/2009/song"
version="0.8"
createdIn="OpenLP 1.9.0"
modifiedIn="MyApp 0.0.1"
modifiedDate="2012-04-10T22:00:00+10:00">
<properties>
<titles>
<title>Amazing Grace</title>
</titles>
</properties>
<lyrics>
<verse name="v1">
<lines>Amazing grace, how sweet the sound<br/>That saved a wretch like me!<br/>I once was lost, but now am found,<br/>Was blind but now I see.</lines>
</verse>
</lyrics>
</song>