diff --git a/openlp/plugins/songs/forms/editsongform.py b/openlp/plugins/songs/forms/editsongform.py index e421e63a0..247e5e837 100644 --- a/openlp/plugins/songs/forms/editsongform.py +++ b/openlp/plugins/songs/forms/editsongform.py @@ -536,7 +536,7 @@ class EditSongForm(QtGui.QDialog, Ui_EditSongDialog): return False if self.song.verse_order: order = [] - order_names = self.song.verse_order.split(u' ') + order_names = self.song.verse_order.split() for item in order_names: if len(item) == 1: order.append(item.lower() + u'1') diff --git a/openlp/plugins/songs/lib/cclifileimport.py b/openlp/plugins/songs/lib/cclifileimport.py old mode 100755 new mode 100644 index 207756e4b..b30ec20ce --- a/openlp/plugins/songs/lib/cclifileimport.py +++ b/openlp/plugins/songs/lib/cclifileimport.py @@ -76,7 +76,12 @@ class CCLIFileImport(SongImport): lines = [] if os.path.isfile(filename): detect_file = open(filename, u'r') - details = chardet.detect(detect_file.read(2048)) + detect_content = detect_file.read(2048) + try: + unicode(detect_content, u'utf-8') + details = {'confidence': 1, 'encoding': 'utf-8'} + except UnicodeDecodeError: + details = chardet.detect(detect_content) detect_file.close() infile = codecs.open(filename, u'r', details['encoding']) lines = infile.readlines() diff --git a/openlp/plugins/songs/lib/mediaitem.py b/openlp/plugins/songs/lib/mediaitem.py index 432eee744..6de559e28 100644 --- a/openlp/plugins/songs/lib/mediaitem.py +++ b/openlp/plugins/songs/lib/mediaitem.py @@ -366,7 +366,7 @@ class SongMediaItem(MediaManagerItem): verse[1][:30], unicode(verse[1]), verseTag) else: # Loop through the verse list and expand the song accordingly. - for order in song.verse_order.upper().split(u' '): + for order in song.verse_order.upper().split(): if len(order) == 0: break for verse in verseList: diff --git a/openlp/plugins/songs/lib/olp1import.py b/openlp/plugins/songs/lib/olp1import.py index de77fdfde..7c01a4c19 100644 --- a/openlp/plugins/songs/lib/olp1import.py +++ b/openlp/plugins/songs/lib/olp1import.py @@ -27,8 +27,11 @@ The :mod:`olp1import` module provides the functionality for importing openlp.org 1.x song databases into the current installation database. """ + +from PyQt4 import QtGui + import logging -import chardet +from chardet.universaldetector import UniversalDetector import sqlite from openlp.core.lib import translate @@ -56,60 +59,38 @@ class OpenLP1SongImport(SongImport): SongImport.__init__(self, manager) self.import_source = kwargs[u'filename'] - def decode_string(self, raw, guess): - """ - Use chardet to detect the encoding of the raw string, and convert it - to unicode. - - ``raw`` - The raw bytestring to decode. - ``guess`` - What chardet guessed the encoding to be. - """ - if guess[u'confidence'] < 0.8: - codec = u'windows-1252' - else: - codec = guess[u'encoding'] - try: - decoded = unicode(raw, codec) - self.last_encoding = codec - except UnicodeDecodeError: - log.exception( - u'Error in detecting openlp.org 1.x database encoding.') - try: - decoded = unicode(raw, self.last_encoding) - except UnicodeDecodeError: - # possibly show an error form - #self.import_wizard.showError(u'There was a problem ' - # u'detecting the encoding of a string') - decoded = raw - return decoded - def do_import(self): """ Run the import for an openlp.org 1.x song database. """ # Connect to the database - connection = sqlite.connect(self.import_source) + encoding = self.get_encoding() + if not encoding: + return False + connection = sqlite.connect(self.import_source, mode=0444, + encoding=(encoding, 'replace')) cursor = connection.cursor() # Determine if we're using a new or an old DB cursor.execute(u'SELECT name FROM sqlite_master ' u'WHERE type = \'table\' AND name = \'tracks\'') - table_list = cursor.fetchall() - new_db = len(table_list) > 0 + new_db = len(cursor.fetchall()) > 0 # Count the number of records we need to import, for the progress bar + cursor.execute(u'-- types int') cursor.execute(u'SELECT COUNT(songid) FROM songs') - count = int(cursor.fetchone()[0]) + count = cursor.fetchone()[0] success = True self.import_wizard.importProgressBar.setMaximum(count) # "cache" our list of authors + cursor.execute(u'-- types int, unicode') cursor.execute(u'SELECT authorid, authorname FROM authors') authors = cursor.fetchall() if new_db: # "cache" our list of tracks + cursor.execute(u'-- types int, unicode') cursor.execute(u'SELECT trackid, fulltrackname FROM tracks') tracks = cursor.fetchall() # Import the songs + cursor.execute(u'-- types int, unicode, unicode, unicode') cursor.execute(u'SELECT songid, songtitle, lyrics || \'\' AS lyrics, ' u'copyrightinfo FROM songs') songs = cursor.fetchall() @@ -119,16 +100,19 @@ class OpenLP1SongImport(SongImport): success = False break song_id = song[0] - guess = chardet.detect(song[2]) - title = self.decode_string(song[1], guess) - lyrics = self.decode_string(song[2], guess).replace(u'\r', u'') - copyright = self.decode_string(song[3], guess) + title = song[1] + lyrics = song[2].replace(u'\r\n', u'\n') + copyright = song[3] self.import_wizard.incrementProgressBar( unicode(translate('SongsPlugin.ImportWizardForm', 'Importing "%s"...')) % title) self.title = title - self.process_song_text(lyrics) + verses = lyrics.split(u'\n\n') + for verse in verses: + if verse.strip() != u'': + self.add_verse(verse.strip()) self.add_copyright(copyright) + cursor.execute(u'-- types int') cursor.execute(u'SELECT authorid FROM songauthors ' u'WHERE songid = %s' % song_id) author_ids = cursor.fetchall() @@ -138,12 +122,13 @@ class OpenLP1SongImport(SongImport): break for author in authors: if author[0] == author_id[0]: - self.parse_author(self.decode_string(author[1], guess)) + self.parse_author(author[1]) break if self.stop_import_flag: success = False break if new_db: + cursor.execute(u'-- types int') cursor.execute(u'SELECT trackid FROM songtracks ' u'WHERE songid = %s ORDER BY listindex' % song_id) track_ids = cursor.fetchall() @@ -153,8 +138,7 @@ class OpenLP1SongImport(SongImport): break for track in tracks: if track[0] == track_id[0]: - self.add_media_file(self.decode_string(track[1], - guess)) + self.add_media_file(track[1]) break if self.stop_import_flag: success = False @@ -162,3 +146,103 @@ class OpenLP1SongImport(SongImport): self.finish() return success + def get_encoding(self): + """ + Detect character encoding of an openlp.org 1.x song database. + """ + # Connect to the database + connection = sqlite.connect(self.import_source, mode=0444) + cursor = connection.cursor() + + detector = UniversalDetector() + # detect charset by authors + cursor.execute(u'SELECT authorname FROM authors') + authors = cursor.fetchall() + for author in authors: + detector.feed(author[0]) + if detector.done: + detector.close() + return detector.result[u'encoding'] + # detect charset by songs + cursor.execute(u'SELECT songtitle, copyrightinfo, ' + u'lyrics || \'\' AS lyrics FROM songs') + songs = cursor.fetchall() + for index in [0, 1, 2]: + for song in songs: + detector.feed(song[index]) + if detector.done: + detector.close() + return detector.result[u'encoding'] + # detect charset by songs + cursor.execute(u'SELECT name FROM sqlite_master ' + u'WHERE type = \'table\' AND name = \'tracks\'') + if len(cursor.fetchall()) > 0: + cursor.execute(u'SELECT fulltrackname FROM tracks') + tracks = cursor.fetchall() + for track in tracks: + detector.feed(track[0]) + if detector.done: + detector.close() + return detector.result[u'encoding'] + detector.close() + guess = detector.result[u'encoding'] + + # map chardet result to compatible windows standard code page + codepage_mapping = {'IBM866': u'cp866', 'TIS-620': u'cp874', + 'SHIFT_JIS': u'cp932', 'GB2312': u'cp936', 'HZ-GB-2312': u'cp936', + 'EUC-KR': u'cp949', 'Big5': u'cp950', 'ISO-8859-2': u'cp1250', + 'windows-1250': u'cp1250', 'windows-1251': u'cp1251', + 'windows-1252': u'cp1252', 'ISO-8859-7': u'cp1253', + 'windows-1253': u'cp1253', 'ISO-8859-8': u'cp1255', + 'windows-1255': u'cp1255'} + if guess in codepage_mapping: + guess = codepage_mapping[guess] + else: + guess = u'cp1252' + + # Show dialog for encoding selection + encodings = [(u'cp1256', translate('SongsPlugin.OpenLP1SongImport', + 'Arabic (CP-1256)')), + (u'cp1257', translate('SongsPlugin.OpenLP1SongImport', + 'Baltic (CP-1257)')), + (u'cp1250', translate('SongsPlugin.OpenLP1SongImport', + 'Central European (CP-1250)')), + (u'cp1251', translate('SongsPlugin.OpenLP1SongImport', + 'Cyrillic (CP-1251)')), + (u'cp1253', translate('SongsPlugin.OpenLP1SongImport', + 'Greek (CP-1253)')), + (u'cp1255', translate('SongsPlugin.OpenLP1SongImport', + 'Hebrew (CP-1255)')), + (u'cp932', translate('SongsPlugin.OpenLP1SongImport', + 'Japanese (CP-932)')), + (u'cp949', translate('SongsPlugin.OpenLP1SongImport', + 'Korean (CP-949)')), + (u'cp936', translate('SongsPlugin.OpenLP1SongImport', + 'Simplified Chinese (CP-936)')), + (u'cp874', translate('SongsPlugin.OpenLP1SongImport', + 'Thai (CP-874)')), + (u'cp950', translate('SongsPlugin.OpenLP1SongImport', + 'Traditional Chinese (CP-950)')), + (u'cp1254', translate('SongsPlugin.OpenLP1SongImport', + 'Turkish (CP-1254)')), + (u'cp1258', translate('SongsPlugin.OpenLP1SongImport', + 'Vietnam (CP-1258)')), + (u'cp1252', translate('SongsPlugin.OpenLP1SongImport', + 'Western European (CP-1252)'))] + encoding_index = 0 + for index in range(len(encodings)): + if guess == encodings[index][0]: + encoding_index = index + break + chosen_encoding = QtGui.QInputDialog.getItem(None, + translate('SongsPlugin.OpenLP1SongImport', + 'Database Character Encoding'), + translate('SongsPlugin.OpenLP1SongImport', + 'The codepage setting is responsible\n' + 'for the correct character representation.\n' + 'Usually you are fine with the preselected choise.'), + [pair[1] for pair in encodings], encoding_index, False) + if not chosen_encoding[1]: + return None + return filter(lambda item: item[1] == chosen_encoding[0], + encodings)[0][0]