Some fancy footwork around encodings.

This commit is contained in:
Raoul Snyman 2010-09-16 20:51:29 +02:00
parent fd5f503b29
commit a3aaf7dce8
1 changed files with 26 additions and 10 deletions

View File

@ -41,6 +41,8 @@ class OpenLP1SongImport(SongImport):
The :class:`OpenLP1SongImport` class provides OpenLP with the ability to
import song databases from installations of openlp.org 1.x.
"""
last_encoding = u'windows-1252'
def __init__(self, manager, **kwargs):
"""
Initialise the import.
@ -54,20 +56,33 @@ class OpenLP1SongImport(SongImport):
SongImport.__init__(self, manager)
self.import_source = kwargs[u'filename']
def decode_string(self, raw):
def decode_string(self, raw, guess):
"""
Use chardet to detect the encoding of the raw string, and convert it
to unicode.
``raw``
The raw bytestring to decode.
``guess``
What chardet guessed the encoding to be.
"""
detection = chardet.detect(raw)
if detection[u'confidence'] < 0.8:
if guess[u'confidence'] < 0.8:
codec = u'windows-1252'
else:
codec = detection[u'encoding']
return unicode(raw, codec)
codec = guess[u'encoding']
try:
decoded = unicode(raw, codec)
self.last_encoding = codec
except UnicodeDecodeError:
log.exception(u'Error in detecting openlp.org 1.x database encoding.')
try:
decoded = unicode(raw, self.last_encoding)
except UnicodeDecodeError:
# possibly show an error form
#self.import_wizard.showError(u'There was a problem '
# u'detecting the encoding of a string')
decoded = raw
return decoded
def do_import(self):
"""
@ -103,9 +118,10 @@ class OpenLP1SongImport(SongImport):
success = False
break
song_id = song[0]
title = self.decode_string(song[1])
lyrics = self.decode_string(song[2]).replace(u'\r', u'')
copyright = self.decode_string(song[3])
guess = chardet.detect(song[2])
title = self.decode_string(song[1], guess)
lyrics = self.decode_string(song[2], guess).replace(u'\r', u'')
copyright = self.decode_string(song[3], guess)
self.import_wizard.incrementProgressBar(
unicode(translate('SongsPlugin.ImportWizardForm',
'Importing "%s"...')) % title)
@ -121,7 +137,7 @@ class OpenLP1SongImport(SongImport):
break
for author in authors:
if author[0] == author_id[0]:
self.parse_author(self.decode_string(author[1]))
self.parse_author(self.decode_string(author[1], guess))
break
if self.stop_import_flag:
success = False
@ -136,7 +152,7 @@ class OpenLP1SongImport(SongImport):
break
for track in tracks:
if track[0] == track_id[0]:
self.add_media_file(self.decode_string(track[1]))
self.add_media_file(self.decode_string(track[1], guess))
break
if self.stop_import_flag:
success = False