From a3aaf7dce88416c89a9156723d7d8e1a96a96c5e Mon Sep 17 00:00:00 2001 From: Raoul Snyman Date: Thu, 16 Sep 2010 20:51:29 +0200 Subject: [PATCH] Some fancy footwork around encodings. --- openlp/plugins/songs/lib/olp1import.py | 36 +++++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/openlp/plugins/songs/lib/olp1import.py b/openlp/plugins/songs/lib/olp1import.py index 692c6792e..de750cb24 100644 --- a/openlp/plugins/songs/lib/olp1import.py +++ b/openlp/plugins/songs/lib/olp1import.py @@ -41,6 +41,8 @@ class OpenLP1SongImport(SongImport): The :class:`OpenLP1SongImport` class provides OpenLP with the ability to import song databases from installations of openlp.org 1.x. """ + last_encoding = u'windows-1252' + def __init__(self, manager, **kwargs): """ Initialise the import. @@ -54,20 +56,33 @@ class OpenLP1SongImport(SongImport): SongImport.__init__(self, manager) self.import_source = kwargs[u'filename'] - def decode_string(self, raw): + def decode_string(self, raw, guess): """ Use chardet to detect the encoding of the raw string, and convert it to unicode. ``raw`` The raw bytestring to decode. + ``guess`` + What chardet guessed the encoding to be. """ - detection = chardet.detect(raw) - if detection[u'confidence'] < 0.8: + if guess[u'confidence'] < 0.8: codec = u'windows-1252' else: - codec = detection[u'encoding'] - return unicode(raw, codec) + codec = guess[u'encoding'] + try: + decoded = unicode(raw, codec) + self.last_encoding = codec + except UnicodeDecodeError: + log.exception(u'Error in detecting openlp.org 1.x database encoding.') + try: + decoded = unicode(raw, self.last_encoding) + except UnicodeDecodeError: + # possibly show an error form + #self.import_wizard.showError(u'There was a problem ' + # u'detecting the encoding of a string') + decoded = raw + return decoded def do_import(self): """ @@ -103,9 +118,10 @@ class OpenLP1SongImport(SongImport): success = False break song_id = song[0] - title = self.decode_string(song[1]) - lyrics = self.decode_string(song[2]).replace(u'\r', u'') - copyright = self.decode_string(song[3]) + guess = chardet.detect(song[2]) + title = self.decode_string(song[1], guess) + lyrics = self.decode_string(song[2], guess).replace(u'\r', u'') + copyright = self.decode_string(song[3], guess) self.import_wizard.incrementProgressBar( unicode(translate('SongsPlugin.ImportWizardForm', 'Importing "%s"...')) % title) @@ -121,7 +137,7 @@ class OpenLP1SongImport(SongImport): break for author in authors: if author[0] == author_id[0]: - self.parse_author(self.decode_string(author[1])) + self.parse_author(self.decode_string(author[1], guess)) break if self.stop_import_flag: success = False @@ -136,7 +152,7 @@ class OpenLP1SongImport(SongImport): break for track in tracks: if track[0] == track_id[0]: - self.add_media_file(self.decode_string(track[1])) + self.add_media_file(self.decode_string(track[1], guess)) break if self.stop_import_flag: success = False