Fixes # 1194610 by detecting the encoding rather than assuming that its cp1252

This commit is contained in:
Philip Ridout 2013-08-02 20:44:54 +01:00
parent 8b21731f72
commit c3b703ec8a

View File

@ -30,6 +30,7 @@
The :mod:`songshowplusimport` module provides the functionality for importing The :mod:`songshowplusimport` module provides the functionality for importing
SongShow Plus songs into the OpenLP database. SongShow Plus songs into the OpenLP database.
""" """
import chardet
import os import os
import logging import logging
import re import re
@ -142,44 +143,44 @@ class SongShowPlusImport(SongImport):
log.debug(length_descriptor_size) log.debug(length_descriptor_size)
data = song_data.read(length_descriptor) data = song_data.read(length_descriptor)
if block_key == TITLE: if block_key == TITLE:
self.title = unicode(data, u'cp1252') self.title = unicode(data, chardet.detect(data)['encoding'])
elif block_key == AUTHOR: elif block_key == AUTHOR:
authors = data.split(" / ") authors = data.split(" / ")
for author in authors: for author in authors:
if author.find(",") !=-1: if author.find(",") !=-1:
authorParts = author.split(", ") authorParts = author.split(", ")
author = authorParts[1] + " " + authorParts[0] author = authorParts[1] + " " + authorParts[0]
self.parseAuthor(unicode(author, u'cp1252')) self.parseAuthor(unicode(author, chardet.detect(data)['encoding']))
elif block_key == COPYRIGHT: elif block_key == COPYRIGHT:
self.addCopyright(unicode(data, u'cp1252')) self.addCopyright(unicode(data, chardet.detect(data)['encoding']))
elif block_key == CCLI_NO: elif block_key == CCLI_NO:
self.ccliNumber = int(data) self.ccliNumber = int(data)
elif block_key == VERSE: elif block_key == VERSE:
self.addVerse(unicode(data, u'cp1252'), self.addVerse(unicode(data, chardet.detect(data)['encoding']),
"%s%s" % (VerseType.Tags[VerseType.Verse], verse_no)) "%s%s" % (VerseType.Tags[VerseType.Verse], verse_no))
elif block_key == CHORUS: elif block_key == CHORUS:
self.addVerse(unicode(data, u'cp1252'), self.addVerse(unicode(data, chardet.detect(data)['encoding']),
"%s%s" % (VerseType.Tags[VerseType.Chorus], verse_no)) "%s%s" % (VerseType.Tags[VerseType.Chorus], verse_no))
elif block_key == BRIDGE: elif block_key == BRIDGE:
self.addVerse(unicode(data, u'cp1252'), self.addVerse(unicode(data, chardet.detect(data)['encoding']),
"%s%s" % (VerseType.Tags[VerseType.Bridge], verse_no)) "%s%s" % (VerseType.Tags[VerseType.Bridge], verse_no))
elif block_key == TOPIC: elif block_key == TOPIC:
self.topics.append(unicode(data, u'cp1252')) self.topics.append(unicode(data, chardet.detect(data)['encoding']))
elif block_key == COMMENTS: elif block_key == COMMENTS:
self.comments = unicode(data, u'cp1252') self.comments = unicode(data, chardet.detect(data)['encoding'])
elif block_key == VERSE_ORDER: elif block_key == VERSE_ORDER:
verse_tag = self.toOpenLPVerseTag(data, True) verse_tag = self.toOpenLPVerseTag(data, True)
if verse_tag: if verse_tag:
if not isinstance(verse_tag, unicode): if not isinstance(verse_tag, unicode):
verse_tag = unicode(verse_tag, u'cp1252') verse_tag = unicode(verse_tag, chardet.detect(data)['encoding'])
self.sspVerseOrderList.append(verse_tag) self.sspVerseOrderList.append(verse_tag)
elif block_key == SONG_BOOK: elif block_key == SONG_BOOK:
self.songBookName = unicode(data, u'cp1252') self.songBookName = unicode(data, chardet.detect(data)['encoding'])
elif block_key == SONG_NUMBER: elif block_key == SONG_NUMBER:
self.songNumber = ord(data) self.songNumber = ord(data)
elif block_key == CUSTOM_VERSE: elif block_key == CUSTOM_VERSE:
verse_tag = self.toOpenLPVerseTag(verse_name) verse_tag = self.toOpenLPVerseTag(verse_name)
self.addVerse(unicode(data, u'cp1252'), verse_tag) self.addVerse(unicode(data, chardet.detect(data)['encoding']), verse_tag)
else: else:
log.debug("Unrecognised blockKey: %s, data: %s" log.debug("Unrecognised blockKey: %s, data: %s"
% (block_key, data)) % (block_key, data))