- openlp.org 1.x importer detects the character encoding for the database and shows a inputDialog for correcting the guess.

- prefer UTF-8 encoding for CCLI import
- split song order at consecutive white spaces (it rejected song orders with two consecutive spaces)

bzr-revno: 1134
This commit is contained in:
Meinert Jordan 2010-12-08 19:15:12 +00:00 committed by Tim Bentley
commit 28de4fbdae
4 changed files with 134 additions and 45 deletions

View File

@ -536,7 +536,7 @@ class EditSongForm(QtGui.QDialog, Ui_EditSongDialog):
return False
if self.song.verse_order:
order = []
order_names = self.song.verse_order.split(u' ')
order_names = self.song.verse_order.split()
for item in order_names:
if len(item) == 1:
order.append(item.lower() + u'1')

7
openlp/plugins/songs/lib/cclifileimport.py Executable file → Normal file
View File

@ -76,7 +76,12 @@ class CCLIFileImport(SongImport):
lines = []
if os.path.isfile(filename):
detect_file = open(filename, u'r')
details = chardet.detect(detect_file.read(2048))
detect_content = detect_file.read(2048)
try:
unicode(detect_content, u'utf-8')
details = {'confidence': 1, 'encoding': 'utf-8'}
except UnicodeDecodeError:
details = chardet.detect(detect_content)
detect_file.close()
infile = codecs.open(filename, u'r', details['encoding'])
lines = infile.readlines()

View File

@ -366,7 +366,7 @@ class SongMediaItem(MediaManagerItem):
verse[1][:30], unicode(verse[1]), verseTag)
else:
# Loop through the verse list and expand the song accordingly.
for order in song.verse_order.upper().split(u' '):
for order in song.verse_order.upper().split():
if len(order) == 0:
break
for verse in verseList:

View File

@ -27,8 +27,11 @@
The :mod:`olp1import` module provides the functionality for importing
openlp.org 1.x song databases into the current installation database.
"""
from PyQt4 import QtGui
import logging
import chardet
from chardet.universaldetector import UniversalDetector
import sqlite
from openlp.core.lib import translate
@ -56,60 +59,38 @@ class OpenLP1SongImport(SongImport):
SongImport.__init__(self, manager)
self.import_source = kwargs[u'filename']
def decode_string(self, raw, guess):
"""
Use chardet to detect the encoding of the raw string, and convert it
to unicode.
``raw``
The raw bytestring to decode.
``guess``
What chardet guessed the encoding to be.
"""
if guess[u'confidence'] < 0.8:
codec = u'windows-1252'
else:
codec = guess[u'encoding']
try:
decoded = unicode(raw, codec)
self.last_encoding = codec
except UnicodeDecodeError:
log.exception(
u'Error in detecting openlp.org 1.x database encoding.')
try:
decoded = unicode(raw, self.last_encoding)
except UnicodeDecodeError:
# possibly show an error form
#self.import_wizard.showError(u'There was a problem '
# u'detecting the encoding of a string')
decoded = raw
return decoded
def do_import(self):
"""
Run the import for an openlp.org 1.x song database.
"""
# Connect to the database
connection = sqlite.connect(self.import_source)
encoding = self.get_encoding()
if not encoding:
return False
connection = sqlite.connect(self.import_source, mode=0444,
encoding=(encoding, 'replace'))
cursor = connection.cursor()
# Determine if we're using a new or an old DB
cursor.execute(u'SELECT name FROM sqlite_master '
u'WHERE type = \'table\' AND name = \'tracks\'')
table_list = cursor.fetchall()
new_db = len(table_list) > 0
new_db = len(cursor.fetchall()) > 0
# Count the number of records we need to import, for the progress bar
cursor.execute(u'-- types int')
cursor.execute(u'SELECT COUNT(songid) FROM songs')
count = int(cursor.fetchone()[0])
count = cursor.fetchone()[0]
success = True
self.import_wizard.importProgressBar.setMaximum(count)
# "cache" our list of authors
cursor.execute(u'-- types int, unicode')
cursor.execute(u'SELECT authorid, authorname FROM authors')
authors = cursor.fetchall()
if new_db:
# "cache" our list of tracks
cursor.execute(u'-- types int, unicode')
cursor.execute(u'SELECT trackid, fulltrackname FROM tracks')
tracks = cursor.fetchall()
# Import the songs
cursor.execute(u'-- types int, unicode, unicode, unicode')
cursor.execute(u'SELECT songid, songtitle, lyrics || \'\' AS lyrics, '
u'copyrightinfo FROM songs')
songs = cursor.fetchall()
@ -119,16 +100,19 @@ class OpenLP1SongImport(SongImport):
success = False
break
song_id = song[0]
guess = chardet.detect(song[2])
title = self.decode_string(song[1], guess)
lyrics = self.decode_string(song[2], guess).replace(u'\r', u'')
copyright = self.decode_string(song[3], guess)
title = song[1]
lyrics = song[2].replace(u'\r\n', u'\n')
copyright = song[3]
self.import_wizard.incrementProgressBar(
unicode(translate('SongsPlugin.ImportWizardForm',
'Importing "%s"...')) % title)
self.title = title
self.process_song_text(lyrics)
verses = lyrics.split(u'\n\n')
for verse in verses:
if verse.strip() != u'':
self.add_verse(verse.strip())
self.add_copyright(copyright)
cursor.execute(u'-- types int')
cursor.execute(u'SELECT authorid FROM songauthors '
u'WHERE songid = %s' % song_id)
author_ids = cursor.fetchall()
@ -138,12 +122,13 @@ class OpenLP1SongImport(SongImport):
break
for author in authors:
if author[0] == author_id[0]:
self.parse_author(self.decode_string(author[1], guess))
self.parse_author(author[1])
break
if self.stop_import_flag:
success = False
break
if new_db:
cursor.execute(u'-- types int')
cursor.execute(u'SELECT trackid FROM songtracks '
u'WHERE songid = %s ORDER BY listindex' % song_id)
track_ids = cursor.fetchall()
@ -153,8 +138,7 @@ class OpenLP1SongImport(SongImport):
break
for track in tracks:
if track[0] == track_id[0]:
self.add_media_file(self.decode_string(track[1],
guess))
self.add_media_file(track[1])
break
if self.stop_import_flag:
success = False
@ -162,3 +146,103 @@ class OpenLP1SongImport(SongImport):
self.finish()
return success
def get_encoding(self):
"""
Detect character encoding of an openlp.org 1.x song database.
"""
# Connect to the database
connection = sqlite.connect(self.import_source, mode=0444)
cursor = connection.cursor()
detector = UniversalDetector()
# detect charset by authors
cursor.execute(u'SELECT authorname FROM authors')
authors = cursor.fetchall()
for author in authors:
detector.feed(author[0])
if detector.done:
detector.close()
return detector.result[u'encoding']
# detect charset by songs
cursor.execute(u'SELECT songtitle, copyrightinfo, '
u'lyrics || \'\' AS lyrics FROM songs')
songs = cursor.fetchall()
for index in [0, 1, 2]:
for song in songs:
detector.feed(song[index])
if detector.done:
detector.close()
return detector.result[u'encoding']
# detect charset by songs
cursor.execute(u'SELECT name FROM sqlite_master '
u'WHERE type = \'table\' AND name = \'tracks\'')
if len(cursor.fetchall()) > 0:
cursor.execute(u'SELECT fulltrackname FROM tracks')
tracks = cursor.fetchall()
for track in tracks:
detector.feed(track[0])
if detector.done:
detector.close()
return detector.result[u'encoding']
detector.close()
guess = detector.result[u'encoding']
# map chardet result to compatible windows standard code page
codepage_mapping = {'IBM866': u'cp866', 'TIS-620': u'cp874',
'SHIFT_JIS': u'cp932', 'GB2312': u'cp936', 'HZ-GB-2312': u'cp936',
'EUC-KR': u'cp949', 'Big5': u'cp950', 'ISO-8859-2': u'cp1250',
'windows-1250': u'cp1250', 'windows-1251': u'cp1251',
'windows-1252': u'cp1252', 'ISO-8859-7': u'cp1253',
'windows-1253': u'cp1253', 'ISO-8859-8': u'cp1255',
'windows-1255': u'cp1255'}
if guess in codepage_mapping:
guess = codepage_mapping[guess]
else:
guess = u'cp1252'
# Show dialog for encoding selection
encodings = [(u'cp1256', translate('SongsPlugin.OpenLP1SongImport',
'Arabic (CP-1256)')),
(u'cp1257', translate('SongsPlugin.OpenLP1SongImport',
'Baltic (CP-1257)')),
(u'cp1250', translate('SongsPlugin.OpenLP1SongImport',
'Central European (CP-1250)')),
(u'cp1251', translate('SongsPlugin.OpenLP1SongImport',
'Cyrillic (CP-1251)')),
(u'cp1253', translate('SongsPlugin.OpenLP1SongImport',
'Greek (CP-1253)')),
(u'cp1255', translate('SongsPlugin.OpenLP1SongImport',
'Hebrew (CP-1255)')),
(u'cp932', translate('SongsPlugin.OpenLP1SongImport',
'Japanese (CP-932)')),
(u'cp949', translate('SongsPlugin.OpenLP1SongImport',
'Korean (CP-949)')),
(u'cp936', translate('SongsPlugin.OpenLP1SongImport',
'Simplified Chinese (CP-936)')),
(u'cp874', translate('SongsPlugin.OpenLP1SongImport',
'Thai (CP-874)')),
(u'cp950', translate('SongsPlugin.OpenLP1SongImport',
'Traditional Chinese (CP-950)')),
(u'cp1254', translate('SongsPlugin.OpenLP1SongImport',
'Turkish (CP-1254)')),
(u'cp1258', translate('SongsPlugin.OpenLP1SongImport',
'Vietnam (CP-1258)')),
(u'cp1252', translate('SongsPlugin.OpenLP1SongImport',
'Western European (CP-1252)'))]
encoding_index = 0
for index in range(len(encodings)):
if guess == encodings[index][0]:
encoding_index = index
break
chosen_encoding = QtGui.QInputDialog.getItem(None,
translate('SongsPlugin.OpenLP1SongImport',
'Database Character Encoding'),
translate('SongsPlugin.OpenLP1SongImport',
'The codepage setting is responsible\n'
'for the correct character representation.\n'
'Usually you are fine with the preselected choise.'),
[pair[1] for pair in encodings], encoding_index, False)
if not chosen_encoding[1]:
return None
return filter(lambda item: item[1] == chosen_encoding[0],
encodings)[0][0]