forked from openlp/openlp
- openlp.org 1.x importer detects the character encoding for the database and shows a inputDialog for correcting the guess.
- prefer UTF-8 encoding for CCLI import - split song order at consecutive white spaces (it rejected song orders with two consecutive spaces) bzr-revno: 1134
This commit is contained in:
commit
28de4fbdae
@ -536,7 +536,7 @@ class EditSongForm(QtGui.QDialog, Ui_EditSongDialog):
|
||||
return False
|
||||
if self.song.verse_order:
|
||||
order = []
|
||||
order_names = self.song.verse_order.split(u' ')
|
||||
order_names = self.song.verse_order.split()
|
||||
for item in order_names:
|
||||
if len(item) == 1:
|
||||
order.append(item.lower() + u'1')
|
||||
|
7
openlp/plugins/songs/lib/cclifileimport.py
Executable file → Normal file
7
openlp/plugins/songs/lib/cclifileimport.py
Executable file → Normal file
@ -76,7 +76,12 @@ class CCLIFileImport(SongImport):
|
||||
lines = []
|
||||
if os.path.isfile(filename):
|
||||
detect_file = open(filename, u'r')
|
||||
details = chardet.detect(detect_file.read(2048))
|
||||
detect_content = detect_file.read(2048)
|
||||
try:
|
||||
unicode(detect_content, u'utf-8')
|
||||
details = {'confidence': 1, 'encoding': 'utf-8'}
|
||||
except UnicodeDecodeError:
|
||||
details = chardet.detect(detect_content)
|
||||
detect_file.close()
|
||||
infile = codecs.open(filename, u'r', details['encoding'])
|
||||
lines = infile.readlines()
|
||||
|
@ -366,7 +366,7 @@ class SongMediaItem(MediaManagerItem):
|
||||
verse[1][:30], unicode(verse[1]), verseTag)
|
||||
else:
|
||||
# Loop through the verse list and expand the song accordingly.
|
||||
for order in song.verse_order.upper().split(u' '):
|
||||
for order in song.verse_order.upper().split():
|
||||
if len(order) == 0:
|
||||
break
|
||||
for verse in verseList:
|
||||
|
@ -27,8 +27,11 @@
|
||||
The :mod:`olp1import` module provides the functionality for importing
|
||||
openlp.org 1.x song databases into the current installation database.
|
||||
"""
|
||||
|
||||
from PyQt4 import QtGui
|
||||
|
||||
import logging
|
||||
import chardet
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
import sqlite
|
||||
|
||||
from openlp.core.lib import translate
|
||||
@ -56,60 +59,38 @@ class OpenLP1SongImport(SongImport):
|
||||
SongImport.__init__(self, manager)
|
||||
self.import_source = kwargs[u'filename']
|
||||
|
||||
def decode_string(self, raw, guess):
|
||||
"""
|
||||
Use chardet to detect the encoding of the raw string, and convert it
|
||||
to unicode.
|
||||
|
||||
``raw``
|
||||
The raw bytestring to decode.
|
||||
``guess``
|
||||
What chardet guessed the encoding to be.
|
||||
"""
|
||||
if guess[u'confidence'] < 0.8:
|
||||
codec = u'windows-1252'
|
||||
else:
|
||||
codec = guess[u'encoding']
|
||||
try:
|
||||
decoded = unicode(raw, codec)
|
||||
self.last_encoding = codec
|
||||
except UnicodeDecodeError:
|
||||
log.exception(
|
||||
u'Error in detecting openlp.org 1.x database encoding.')
|
||||
try:
|
||||
decoded = unicode(raw, self.last_encoding)
|
||||
except UnicodeDecodeError:
|
||||
# possibly show an error form
|
||||
#self.import_wizard.showError(u'There was a problem '
|
||||
# u'detecting the encoding of a string')
|
||||
decoded = raw
|
||||
return decoded
|
||||
|
||||
def do_import(self):
|
||||
"""
|
||||
Run the import for an openlp.org 1.x song database.
|
||||
"""
|
||||
# Connect to the database
|
||||
connection = sqlite.connect(self.import_source)
|
||||
encoding = self.get_encoding()
|
||||
if not encoding:
|
||||
return False
|
||||
connection = sqlite.connect(self.import_source, mode=0444,
|
||||
encoding=(encoding, 'replace'))
|
||||
cursor = connection.cursor()
|
||||
# Determine if we're using a new or an old DB
|
||||
cursor.execute(u'SELECT name FROM sqlite_master '
|
||||
u'WHERE type = \'table\' AND name = \'tracks\'')
|
||||
table_list = cursor.fetchall()
|
||||
new_db = len(table_list) > 0
|
||||
new_db = len(cursor.fetchall()) > 0
|
||||
# Count the number of records we need to import, for the progress bar
|
||||
cursor.execute(u'-- types int')
|
||||
cursor.execute(u'SELECT COUNT(songid) FROM songs')
|
||||
count = int(cursor.fetchone()[0])
|
||||
count = cursor.fetchone()[0]
|
||||
success = True
|
||||
self.import_wizard.importProgressBar.setMaximum(count)
|
||||
# "cache" our list of authors
|
||||
cursor.execute(u'-- types int, unicode')
|
||||
cursor.execute(u'SELECT authorid, authorname FROM authors')
|
||||
authors = cursor.fetchall()
|
||||
if new_db:
|
||||
# "cache" our list of tracks
|
||||
cursor.execute(u'-- types int, unicode')
|
||||
cursor.execute(u'SELECT trackid, fulltrackname FROM tracks')
|
||||
tracks = cursor.fetchall()
|
||||
# Import the songs
|
||||
cursor.execute(u'-- types int, unicode, unicode, unicode')
|
||||
cursor.execute(u'SELECT songid, songtitle, lyrics || \'\' AS lyrics, '
|
||||
u'copyrightinfo FROM songs')
|
||||
songs = cursor.fetchall()
|
||||
@ -119,16 +100,19 @@ class OpenLP1SongImport(SongImport):
|
||||
success = False
|
||||
break
|
||||
song_id = song[0]
|
||||
guess = chardet.detect(song[2])
|
||||
title = self.decode_string(song[1], guess)
|
||||
lyrics = self.decode_string(song[2], guess).replace(u'\r', u'')
|
||||
copyright = self.decode_string(song[3], guess)
|
||||
title = song[1]
|
||||
lyrics = song[2].replace(u'\r\n', u'\n')
|
||||
copyright = song[3]
|
||||
self.import_wizard.incrementProgressBar(
|
||||
unicode(translate('SongsPlugin.ImportWizardForm',
|
||||
'Importing "%s"...')) % title)
|
||||
self.title = title
|
||||
self.process_song_text(lyrics)
|
||||
verses = lyrics.split(u'\n\n')
|
||||
for verse in verses:
|
||||
if verse.strip() != u'':
|
||||
self.add_verse(verse.strip())
|
||||
self.add_copyright(copyright)
|
||||
cursor.execute(u'-- types int')
|
||||
cursor.execute(u'SELECT authorid FROM songauthors '
|
||||
u'WHERE songid = %s' % song_id)
|
||||
author_ids = cursor.fetchall()
|
||||
@ -138,12 +122,13 @@ class OpenLP1SongImport(SongImport):
|
||||
break
|
||||
for author in authors:
|
||||
if author[0] == author_id[0]:
|
||||
self.parse_author(self.decode_string(author[1], guess))
|
||||
self.parse_author(author[1])
|
||||
break
|
||||
if self.stop_import_flag:
|
||||
success = False
|
||||
break
|
||||
if new_db:
|
||||
cursor.execute(u'-- types int')
|
||||
cursor.execute(u'SELECT trackid FROM songtracks '
|
||||
u'WHERE songid = %s ORDER BY listindex' % song_id)
|
||||
track_ids = cursor.fetchall()
|
||||
@ -153,8 +138,7 @@ class OpenLP1SongImport(SongImport):
|
||||
break
|
||||
for track in tracks:
|
||||
if track[0] == track_id[0]:
|
||||
self.add_media_file(self.decode_string(track[1],
|
||||
guess))
|
||||
self.add_media_file(track[1])
|
||||
break
|
||||
if self.stop_import_flag:
|
||||
success = False
|
||||
@ -162,3 +146,103 @@ class OpenLP1SongImport(SongImport):
|
||||
self.finish()
|
||||
return success
|
||||
|
||||
def get_encoding(self):
|
||||
"""
|
||||
Detect character encoding of an openlp.org 1.x song database.
|
||||
"""
|
||||
# Connect to the database
|
||||
connection = sqlite.connect(self.import_source, mode=0444)
|
||||
cursor = connection.cursor()
|
||||
|
||||
detector = UniversalDetector()
|
||||
# detect charset by authors
|
||||
cursor.execute(u'SELECT authorname FROM authors')
|
||||
authors = cursor.fetchall()
|
||||
for author in authors:
|
||||
detector.feed(author[0])
|
||||
if detector.done:
|
||||
detector.close()
|
||||
return detector.result[u'encoding']
|
||||
# detect charset by songs
|
||||
cursor.execute(u'SELECT songtitle, copyrightinfo, '
|
||||
u'lyrics || \'\' AS lyrics FROM songs')
|
||||
songs = cursor.fetchall()
|
||||
for index in [0, 1, 2]:
|
||||
for song in songs:
|
||||
detector.feed(song[index])
|
||||
if detector.done:
|
||||
detector.close()
|
||||
return detector.result[u'encoding']
|
||||
# detect charset by songs
|
||||
cursor.execute(u'SELECT name FROM sqlite_master '
|
||||
u'WHERE type = \'table\' AND name = \'tracks\'')
|
||||
if len(cursor.fetchall()) > 0:
|
||||
cursor.execute(u'SELECT fulltrackname FROM tracks')
|
||||
tracks = cursor.fetchall()
|
||||
for track in tracks:
|
||||
detector.feed(track[0])
|
||||
if detector.done:
|
||||
detector.close()
|
||||
return detector.result[u'encoding']
|
||||
detector.close()
|
||||
guess = detector.result[u'encoding']
|
||||
|
||||
# map chardet result to compatible windows standard code page
|
||||
codepage_mapping = {'IBM866': u'cp866', 'TIS-620': u'cp874',
|
||||
'SHIFT_JIS': u'cp932', 'GB2312': u'cp936', 'HZ-GB-2312': u'cp936',
|
||||
'EUC-KR': u'cp949', 'Big5': u'cp950', 'ISO-8859-2': u'cp1250',
|
||||
'windows-1250': u'cp1250', 'windows-1251': u'cp1251',
|
||||
'windows-1252': u'cp1252', 'ISO-8859-7': u'cp1253',
|
||||
'windows-1253': u'cp1253', 'ISO-8859-8': u'cp1255',
|
||||
'windows-1255': u'cp1255'}
|
||||
if guess in codepage_mapping:
|
||||
guess = codepage_mapping[guess]
|
||||
else:
|
||||
guess = u'cp1252'
|
||||
|
||||
# Show dialog for encoding selection
|
||||
encodings = [(u'cp1256', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Arabic (CP-1256)')),
|
||||
(u'cp1257', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Baltic (CP-1257)')),
|
||||
(u'cp1250', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Central European (CP-1250)')),
|
||||
(u'cp1251', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Cyrillic (CP-1251)')),
|
||||
(u'cp1253', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Greek (CP-1253)')),
|
||||
(u'cp1255', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Hebrew (CP-1255)')),
|
||||
(u'cp932', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Japanese (CP-932)')),
|
||||
(u'cp949', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Korean (CP-949)')),
|
||||
(u'cp936', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Simplified Chinese (CP-936)')),
|
||||
(u'cp874', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Thai (CP-874)')),
|
||||
(u'cp950', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Traditional Chinese (CP-950)')),
|
||||
(u'cp1254', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Turkish (CP-1254)')),
|
||||
(u'cp1258', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Vietnam (CP-1258)')),
|
||||
(u'cp1252', translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Western European (CP-1252)'))]
|
||||
encoding_index = 0
|
||||
for index in range(len(encodings)):
|
||||
if guess == encodings[index][0]:
|
||||
encoding_index = index
|
||||
break
|
||||
chosen_encoding = QtGui.QInputDialog.getItem(None,
|
||||
translate('SongsPlugin.OpenLP1SongImport',
|
||||
'Database Character Encoding'),
|
||||
translate('SongsPlugin.OpenLP1SongImport',
|
||||
'The codepage setting is responsible\n'
|
||||
'for the correct character representation.\n'
|
||||
'Usually you are fine with the preselected choise.'),
|
||||
[pair[1] for pair in encodings], encoding_index, False)
|
||||
if not chosen_encoding[1]:
|
||||
return None
|
||||
return filter(lambda item: item[1] == chosen_encoding[0],
|
||||
encodings)[0][0]
|
||||
|
Loading…
Reference in New Issue
Block a user