From c3b703ec8a7d04cf7601fa120a1c9e0c67c3ed9c Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Fri, 2 Aug 2013 20:44:54 +0100 Subject: [PATCH 1/5] Fixes # 1194610 by detecting the encoding rather than assuming that its cp1252 --- .../plugins/songs/lib/songshowplusimport.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index 04fe10370..6edf089ff 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -30,6 +30,7 @@ The :mod:`songshowplusimport` module provides the functionality for importing SongShow Plus songs into the OpenLP database. """ +import chardet import os import logging import re @@ -142,44 +143,44 @@ class SongShowPlusImport(SongImport): log.debug(length_descriptor_size) data = song_data.read(length_descriptor) if block_key == TITLE: - self.title = unicode(data, u'cp1252') + self.title = unicode(data, chardet.detect(data)['encoding']) elif block_key == AUTHOR: authors = data.split(" / ") for author in authors: if author.find(",") !=-1: authorParts = author.split(", ") author = authorParts[1] + " " + authorParts[0] - self.parseAuthor(unicode(author, u'cp1252')) + self.parseAuthor(unicode(author, chardet.detect(data)['encoding'])) elif block_key == COPYRIGHT: - self.addCopyright(unicode(data, u'cp1252')) + self.addCopyright(unicode(data, chardet.detect(data)['encoding'])) elif block_key == CCLI_NO: self.ccliNumber = int(data) elif block_key == VERSE: - self.addVerse(unicode(data, u'cp1252'), + self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.Tags[VerseType.Verse], verse_no)) elif block_key == CHORUS: - self.addVerse(unicode(data, u'cp1252'), + self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.Tags[VerseType.Chorus], verse_no)) elif block_key == BRIDGE: - self.addVerse(unicode(data, u'cp1252'), + self.addVerse(unicode(data, chardet.detect(data)['encoding']), "%s%s" % (VerseType.Tags[VerseType.Bridge], verse_no)) elif block_key == TOPIC: - self.topics.append(unicode(data, u'cp1252')) + self.topics.append(unicode(data, chardet.detect(data)['encoding'])) elif block_key == COMMENTS: - self.comments = unicode(data, u'cp1252') + self.comments = unicode(data, chardet.detect(data)['encoding']) elif block_key == VERSE_ORDER: verse_tag = self.toOpenLPVerseTag(data, True) if verse_tag: if not isinstance(verse_tag, unicode): - verse_tag = unicode(verse_tag, u'cp1252') + verse_tag = unicode(verse_tag, chardet.detect(data)['encoding']) self.sspVerseOrderList.append(verse_tag) elif block_key == SONG_BOOK: - self.songBookName = unicode(data, u'cp1252') + self.songBookName = unicode(data, chardet.detect(data)['encoding']) elif block_key == SONG_NUMBER: self.songNumber = ord(data) elif block_key == CUSTOM_VERSE: verse_tag = self.toOpenLPVerseTag(verse_name) - self.addVerse(unicode(data, u'cp1252'), verse_tag) + self.addVerse(unicode(data, chardet.detect(data)['encoding']), verse_tag) else: log.debug("Unrecognised blockKey: %s, data: %s" % (block_key, data)) From 711fa05dc157e229b39c34ad9d2c60ede31435df Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Tue, 13 Aug 2013 21:33:26 +0100 Subject: [PATCH 2/5] added option to select windows encoding --- .../plugins/songs/lib/songshowplusimport.py | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index 6edf089ff..abfa0a84e 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -37,7 +37,7 @@ import re import struct from openlp.core.ui.wizard import WizardStrings -from openlp.plugins.songs.lib import VerseType +from openlp.plugins.songs.lib import VerseType, retrieve_windows_encoding from openlp.plugins.songs.lib.songimport import SongImport TITLE = 1 @@ -143,44 +143,44 @@ class SongShowPlusImport(SongImport): log.debug(length_descriptor_size) data = song_data.read(length_descriptor) if block_key == TITLE: - self.title = unicode(data, chardet.detect(data)['encoding']) + self.title = self.decode(data) elif block_key == AUTHOR: authors = data.split(" / ") for author in authors: if author.find(",") !=-1: authorParts = author.split(", ") author = authorParts[1] + " " + authorParts[0] - self.parseAuthor(unicode(author, chardet.detect(data)['encoding'])) + self.parseAuthor(self.decode(author)) elif block_key == COPYRIGHT: - self.addCopyright(unicode(data, chardet.detect(data)['encoding'])) + self.addCopyright(self.decode(data)) elif block_key == CCLI_NO: self.ccliNumber = int(data) elif block_key == VERSE: - self.addVerse(unicode(data, chardet.detect(data)['encoding']), + self.addVerse(self.decode(data), "%s%s" % (VerseType.Tags[VerseType.Verse], verse_no)) elif block_key == CHORUS: - self.addVerse(unicode(data, chardet.detect(data)['encoding']), + self.addVerse(self.decode(data), "%s%s" % (VerseType.Tags[VerseType.Chorus], verse_no)) elif block_key == BRIDGE: - self.addVerse(unicode(data, chardet.detect(data)['encoding']), + self.addVerse(self.decode(data), "%s%s" % (VerseType.Tags[VerseType.Bridge], verse_no)) elif block_key == TOPIC: - self.topics.append(unicode(data, chardet.detect(data)['encoding'])) + self.topics.append(self.decode(data)) elif block_key == COMMENTS: - self.comments = unicode(data, chardet.detect(data)['encoding']) + self.comments = self.decode(data) elif block_key == VERSE_ORDER: verse_tag = self.toOpenLPVerseTag(data, True) if verse_tag: if not isinstance(verse_tag, unicode): - verse_tag = unicode(verse_tag, chardet.detect(data)['encoding']) + verse_tag = self.decode(verse_tag) self.sspVerseOrderList.append(verse_tag) elif block_key == SONG_BOOK: - self.songBookName = unicode(data, chardet.detect(data)['encoding']) + self.songBookName = self.decode(data) elif block_key == SONG_NUMBER: self.songNumber = ord(data) elif block_key == CUSTOM_VERSE: verse_tag = self.toOpenLPVerseTag(verse_name) - self.addVerse(unicode(data, chardet.detect(data)['encoding']), verse_tag) + self.addVerse(self.decode(data), verse_tag) else: log.debug("Unrecognised blockKey: %s, data: %s" % (block_key, data)) @@ -222,3 +222,13 @@ class SongShowPlusImport(SongImport): verse_tag = VerseType.Tags[VerseType.Other] verse_number = self.otherList[verse_name] return verse_tag + verse_number + + def decode(self, data): + try: + return unicode(data, chardet.detect(data)['encoding']) + except: + while True: + try: + return unicode(data, self.encoding) + except: + self.encoding = retrieve_windows_encoding() \ No newline at end of file From d724a6c87293ad54c586e7263b18dc48e58bef7f Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Wed, 14 Aug 2013 06:07:23 +0100 Subject: [PATCH 3/5] changed fallback to cp1252 --- openlp/plugins/songs/lib/songshowplusimport.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index abfa0a84e..aaa3b7ad9 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -227,8 +227,4 @@ class SongShowPlusImport(SongImport): try: return unicode(data, chardet.detect(data)['encoding']) except: - while True: - try: - return unicode(data, self.encoding) - except: - self.encoding = retrieve_windows_encoding() \ No newline at end of file + return unicode(data, u'cp1252') \ No newline at end of file From 7cc9f9ae7b89185fccee19bfc02591a5a6eee17a Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Tue, 20 Aug 2013 21:19:11 +0000 Subject: [PATCH 4/5] reinstate retrieve_windows_encoding dialog --- openlp/plugins/songs/lib/songshowplusimport.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index aaa3b7ad9..8d3dcba25 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -227,4 +227,9 @@ class SongShowPlusImport(SongImport): try: return unicode(data, chardet.detect(data)['encoding']) except: - return unicode(data, u'cp1252') \ No newline at end of file + self.encoding = u'cp1252' + while self.encoding: + try: + return unicode(data, self.encoding) + except: + self.encoding = retrieve_windows_encoding() From e802d577fd07620fc9ea22bb4ba8ef7d06824b4d Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Wed, 21 Aug 2013 05:30:11 +0000 Subject: [PATCH 5/5] Removed loop --- openlp/plugins/songs/lib/songshowplusimport.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/openlp/plugins/songs/lib/songshowplusimport.py b/openlp/plugins/songs/lib/songshowplusimport.py index 8d3dcba25..45746bdd1 100644 --- a/openlp/plugins/songs/lib/songshowplusimport.py +++ b/openlp/plugins/songs/lib/songshowplusimport.py @@ -227,9 +227,4 @@ class SongShowPlusImport(SongImport): try: return unicode(data, chardet.detect(data)['encoding']) except: - self.encoding = u'cp1252' - while self.encoding: - try: - return unicode(data, self.encoding) - except: - self.encoding = retrieve_windows_encoding() + return unicode(data, retrieve_windows_encoding())