From 961d70b836d135ffb1837f86da941875dc4b319e Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sat, 3 Aug 2013 20:15:59 +0100 Subject: [PATCH 1/5] Fixed bug #1194610 By detecting the encoding ranther than assuming. --- .../plugins/songs/lib/songshowplusimport.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index a72f83c4f..ca08e9002 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -30,6 +30,7 @@ The :mod:`songshowplusimport` module provides the functionality for importing SongShow Plus songs into the OpenLP database. """ +import chardet import os import logging import re @@ -134,41 +135,41 @@ class SongShowPlusImport(SongImport): log.debug(length_descriptor_size) data = song_data.read(length_descriptor) if block_key == TITLE: - self.title = unicode(data, u'cp1252') + self.title = unicode(data, chardet.detect(data)['encoding']) elif block_key == AUTHOR: authors = data.split(" / ") for author in authors: if author.find(",") !=-1: authorParts = author.split(", ") author = authorParts[1] + " " + authorParts[0] - self.parse_author(unicode(author, u'cp1252')) + self.parse_author(unicode(author, chardet.detect(data)['encoding'])) elif block_key == COPYRIGHT: - self.addCopyright(unicode(data, u'cp1252')) + self.addCopyright(unicode(data, chardet.detect(data)['encoding'])) elif block_key == CCLI_NO: self.ccliNumber = int(data) elif block_key == VERSE: - self.addVerse(unicode(data, u'cp1252'), "%s%s" % (VerseType.tags[VerseType.Verse], verse_no)) + self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.tags[VerseType.Verse], verse_no)) elif block_key == CHORUS: - self.addVerse(unicode(data, u'cp1252'), "%s%s" % (VerseType.tags[VerseType.Chorus], verse_no)) + self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.tags[VerseType.Chorus], verse_no)) elif block_key == BRIDGE: - self.addVerse(unicode(data, u'cp1252'), "%s%s" % (VerseType.tags[VerseType.Bridge], verse_no)) + self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.tags[VerseType.Bridge], verse_no)) elif block_key == TOPIC: - self.topics.append(unicode(data, u'cp1252')) + self.topics.append(unicode(data, chardet.detect(data)['encoding'])) elif block_key == COMMENTS: - self.comments = unicode(data, u'cp1252') + self.comments = unicode(data, chardet.detect(data)['encoding']) elif block_key == VERSE_ORDER: verse_tag = self.to_openlp_verse_tag(data, True) if verse_tag: if not isinstance(verse_tag, unicode): - verse_tag = unicode(verse_tag, u'cp1252') + verse_tag = unicode(verse_tag, chardet.detect(data)['encoding']) self.ssp_verse_order_list.append(verse_tag) elif block_key == SONG_BOOK: - self.songBookName = unicode(data, u'cp1252') + self.songBookName = unicode(data, chardet.detect(data)['encoding']) elif block_key == SONG_NUMBER: self.songNumber = ord(data) elif block_key == CUSTOM_VERSE: verse_tag = self.to_openlp_verse_tag(verse_name) - self.addVerse(unicode(data, u'cp1252'), verse_tag) + self.addVerse(unicode(data, chardet.detect(data)['encoding']), verse_tag) else: log.debug("Unrecognised blockKey: %s, data: %s" % (block_key, data)) song_data.seek(next_block_starts) From 1fb0048def598c580b85fd6a695eba84e0f82719 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Tue, 13 Aug 2013 21:51:52 +0100 Subject: [PATCH 2/5] added fallback to retieve_windows encoding --- .../plugins/songs/lib/songshowplusimport.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index ca08e9002..c9ca2a345 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -135,41 +135,41 @@ class SongShowPlusImport(SongImport): log.debug(length_descriptor_size) data = song_data.read(length_descriptor) if block_key == TITLE: - self.title = unicode(data, chardet.detect(data)['encoding']) + self.title = self.decode(data) elif block_key == AUTHOR: authors = data.split(" / ") for author in authors: if author.find(",") !=-1: authorParts = author.split(", ") author = authorParts[1] + " " + authorParts[0] - self.parse_author(unicode(author, chardet.detect(data)['encoding'])) + self.parse_author(self.decode(author)) elif block_key == COPYRIGHT: - self.addCopyright(unicode(data, chardet.detect(data)['encoding'])) + self.addCopyright(self.decode(data)) elif block_key == CCLI_NO: self.ccliNumber = int(data) elif block_key == VERSE: - self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.tags[VerseType.Verse], verse_no)) + self.addVerse(self.decode(data), "%s%s" % (VerseType.tags[VerseType.Verse], verse_no)) elif block_key == CHORUS: - self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.tags[VerseType.Chorus], verse_no)) + self.addVerse(self.decode(data), "%s%s" % (VerseType.tags[VerseType.Chorus], verse_no)) elif block_key == BRIDGE: - self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.tags[VerseType.Bridge], verse_no)) + self.addVerse(self.decode(data), "%s%s" % (VerseType.tags[VerseType.Bridge], verse_no)) elif block_key == TOPIC: - self.topics.append(unicode(data, chardet.detect(data)['encoding'])) + self.topics.append(self.decode(data)) elif block_key == COMMENTS: - self.comments = unicode(data, chardet.detect(data)['encoding']) + self.comments = self.decode(data) elif block_key == VERSE_ORDER: verse_tag = self.to_openlp_verse_tag(data, True) if verse_tag: if not isinstance(verse_tag, unicode): - verse_tag = unicode(verse_tag, chardet.detect(data)['encoding']) + verse_tag = self.decode(verse_tag) self.ssp_verse_order_list.append(verse_tag) elif block_key == SONG_BOOK: - self.songBookName = unicode(data, chardet.detect(data)['encoding']) + self.songBookName = self.decode(data) elif block_key == SONG_NUMBER: self.songNumber = ord(data) elif block_key == CUSTOM_VERSE: verse_tag = self.to_openlp_verse_tag(verse_name) - self.addVerse(unicode(data, chardet.detect(data)['encoding']), verse_tag) + self.addVerse(self.decode(data), verse_tag) else: log.debug("Unrecognised blockKey: %s, data: %s" % (block_key, data)) song_data.seek(next_block_starts) @@ -207,3 +207,13 @@ class SongShowPlusImport(SongImport): verse_tag = VerseType.tags[VerseType.Other] verse_number = self.other_list[verse_name] return verse_tag + verse_number + + def decode(self, data): + try: + return unicode(data, chardet.detect(data)['encoding']) + except: + while True: + try: + return unicode(data, self.encoding) + except: + self.encoding = retrieve_windows_encoding() \ No newline at end of file From 87e7fcdbda62aedbe0855a97cff6a7fafa6af666 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sat, 7 Sep 2013 17:18:31 +0000 Subject: [PATCH 3/5] added fallback to retieve_windows encoding --- openlp/plugins/songs/lib/songshowplusimport.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index c9ca2a345..a10ac7668 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -212,8 +212,4 @@ class SongShowPlusImport(SongImport): try: return unicode(data, chardet.detect(data)['encoding']) except: - while True: - try: - return unicode(data, self.encoding) - except: - self.encoding = retrieve_windows_encoding() \ No newline at end of file + return unicode(data, u'cp1252') \ No newline at end of file From 3499f1ee65d78561ab768325098db9176fa4f396 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sat, 7 Sep 2013 17:37:40 +0000 Subject: [PATCH 4/5] missed some instances of unicode --- openlp/plugins/songs/lib/songshowplusimport.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index 315d3d18d..d1fa9dec3 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -160,7 +160,7 @@ class SongShowPlusImport(SongImport): elif block_key == VERSE_ORDER: verse_tag = self.to_openlp_verse_tag(data, True) if verse_tag: - if not isinstance(verse_tag, unicode): + if not isinstance(verse_tag, str): verse_tag = self.decode(verse_tag) self.ssp_verse_order_list.append(verse_tag) elif block_key == SONG_BOOK: @@ -210,6 +210,6 @@ class SongShowPlusImport(SongImport): def decode(self, data): try: - return unicode(data, chardet.detect(data)['encoding']) + return str(data, chardet.detect(data)['encoding']) except: - return unicode(data, 'cp1252') \ No newline at end of file + return str(data, 'cp1252') \ No newline at end of file From f725f8a92b667df609b5c8d741c0cca7134c6f1b Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Sat, 14 Sep 2013 23:22:05 +0100 Subject: [PATCH 5/5] filter returns an itterator in Py3 (as apposed to a list in Py2) --- openlp/plugins/songs/lib/__init__.py | 2 +- openlp/plugins/songs/lib/songshowplusimport.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/openlp/plugins/songs/lib/__init__.py b/openlp/plugins/songs/lib/__init__.py index 271a94710..e98e54427 100644 --- a/openlp/plugins/songs/lib/__init__.py +++ b/openlp/plugins/songs/lib/__init__.py @@ -365,7 +365,7 @@ def retrieve_windows_encoding(recommendation=None): [pair[1] for pair in encodings], 0, False) if not choice[1]: return None - return filter(lambda item: item[1] == choice[0], encodings)[0][0] + return next(filter(lambda item: item[1] == choice[0], encodings))[0] def clean_string(string): diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index d1fa9dec3..35cd44b8a 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -37,7 +37,7 @@ import re import struct from openlp.core.ui.wizard import WizardStrings -from openlp.plugins.songs.lib import VerseType +from openlp.plugins.songs.lib import VerseType, retrieve_windows_encoding from openlp.plugins.songs.lib.songimport import SongImport TITLE = 1 @@ -133,16 +133,16 @@ class SongShowPlusImport(SongImport): else: length_descriptor, = struct.unpack("B", song_data.read(1)) log.debug(length_descriptor_size) - data = song_data.read(length_descriptor).decode() + data = song_data.read(length_descriptor) if block_key == TITLE: self.title = self.decode(data) elif block_key == AUTHOR: - authors = data.split(" / ") + authors = self.decode(data).split(" / ") for author in authors: if author.find(",") !=-1: authorParts = author.split(", ") author = authorParts[1] + " " + authorParts[0] - self.parse_author(self.decode(author)) + self.parse_author(author) elif block_key == COPYRIGHT: self.addCopyright(self.decode(data)) elif block_key == CCLI_NO: @@ -158,7 +158,7 @@ class SongShowPlusImport(SongImport): elif block_key == COMMENTS: self.comments = self.decode(data) elif block_key == VERSE_ORDER: - verse_tag = self.to_openlp_verse_tag(data, True) + verse_tag = self.to_openlp_verse_tag(self.decode(data), True) if verse_tag: if not isinstance(verse_tag, str): verse_tag = self.decode(verse_tag) @@ -212,4 +212,4 @@ class SongShowPlusImport(SongImport): try: return str(data, chardet.detect(data)['encoding']) except: - return str(data, 'cp1252') \ No newline at end of file + return str(data, retrieve_windows_encoding()) \ No newline at end of file