Clean import strings of ASCII control chars (user's db had x07 chars)

This commit is contained in:
Samuel Findlay 2012-06-09 04:30:34 +10:00
parent 398ac5afdf
commit f02e886976

View File

@ -77,6 +77,9 @@ class ZionWorxImport(SongImport):
""" """
Receive a CSV file (from a ZionWorx database dump) to import. Receive a CSV file (from a ZionWorx database dump) to import.
""" """
# Used to strip control chars (10=LF, 13=CR, 127=DEL)
self.control_chars_map = dict.fromkeys(
range(10) + [11, 12] + range(14,32) + [127])
with open(self.importSource, 'rb') as songs_file: with open(self.importSource, 'rb') as songs_file:
fieldnames = [u'SongNum', u'Title1', u'Title2', u'Lyrics', fieldnames = [u'SongNum', u'Title1', u'Title2', u'Lyrics',
u'Writer', u'Copyright', u'Keywords', u'DefaultStyle'] u'Writer', u'Copyright', u'Keywords', u'DefaultStyle']
@ -131,9 +134,9 @@ class ZionWorxImport(SongImport):
def _decode(self, str): def _decode(self, str):
""" """
Decodes CSV input to unicode. Decodes CSV input to unicode, stripping all control characters (except
new lines).
This encoding choice seems OK. ZionWorx has no option for setting the
encoding for its songs, so we assume encoding is always the same.
""" """
return unicode(str, u'cp1252') # This encoding choice seems OK. ZionWorx has no option for setting the
# encoding for its songs, so we assume encoding is always the same.
return unicode(str, u'cp1252').translate(self.control_chars_map)