Some fancy footwork around encodings.

This commit is contained in:
Raoul Snyman 2010-09-16 20:51:29 +02:00
parent fd5f503b29
commit a3aaf7dce8

View File

@ -41,6 +41,8 @@ class OpenLP1SongImport(SongImport):
The :class:`OpenLP1SongImport` class provides OpenLP with the ability to The :class:`OpenLP1SongImport` class provides OpenLP with the ability to
import song databases from installations of openlp.org 1.x. import song databases from installations of openlp.org 1.x.
""" """
last_encoding = u'windows-1252'
def __init__(self, manager, **kwargs): def __init__(self, manager, **kwargs):
""" """
Initialise the import. Initialise the import.
@ -54,20 +56,33 @@ class OpenLP1SongImport(SongImport):
SongImport.__init__(self, manager) SongImport.__init__(self, manager)
self.import_source = kwargs[u'filename'] self.import_source = kwargs[u'filename']
def decode_string(self, raw): def decode_string(self, raw, guess):
""" """
Use chardet to detect the encoding of the raw string, and convert it Use chardet to detect the encoding of the raw string, and convert it
to unicode. to unicode.
``raw`` ``raw``
The raw bytestring to decode. The raw bytestring to decode.
``guess``
What chardet guessed the encoding to be.
""" """
detection = chardet.detect(raw) if guess[u'confidence'] < 0.8:
if detection[u'confidence'] < 0.8:
codec = u'windows-1252' codec = u'windows-1252'
else: else:
codec = detection[u'encoding'] codec = guess[u'encoding']
return unicode(raw, codec) try:
decoded = unicode(raw, codec)
self.last_encoding = codec
except UnicodeDecodeError:
log.exception(u'Error in detecting openlp.org 1.x database encoding.')
try:
decoded = unicode(raw, self.last_encoding)
except UnicodeDecodeError:
# possibly show an error form
#self.import_wizard.showError(u'There was a problem '
# u'detecting the encoding of a string')
decoded = raw
return decoded
def do_import(self): def do_import(self):
""" """
@ -103,9 +118,10 @@ class OpenLP1SongImport(SongImport):
success = False success = False
break break
song_id = song[0] song_id = song[0]
title = self.decode_string(song[1]) guess = chardet.detect(song[2])
lyrics = self.decode_string(song[2]).replace(u'\r', u'') title = self.decode_string(song[1], guess)
copyright = self.decode_string(song[3]) lyrics = self.decode_string(song[2], guess).replace(u'\r', u'')
copyright = self.decode_string(song[3], guess)
self.import_wizard.incrementProgressBar( self.import_wizard.incrementProgressBar(
unicode(translate('SongsPlugin.ImportWizardForm', unicode(translate('SongsPlugin.ImportWizardForm',
'Importing "%s"...')) % title) 'Importing "%s"...')) % title)
@ -121,7 +137,7 @@ class OpenLP1SongImport(SongImport):
break break
for author in authors: for author in authors:
if author[0] == author_id[0]: if author[0] == author_id[0]:
self.parse_author(self.decode_string(author[1])) self.parse_author(self.decode_string(author[1], guess))
break break
if self.stop_import_flag: if self.stop_import_flag:
success = False success = False
@ -136,7 +152,7 @@ class OpenLP1SongImport(SongImport):
break break
for track in tracks: for track in tracks:
if track[0] == track_id[0]: if track[0] == track_id[0]:
self.add_media_file(self.decode_string(track[1])) self.add_media_file(self.decode_string(track[1], guess))
break break
if self.stop_import_flag: if self.stop_import_flag:
success = False success = False