PowerSong importer working. Successful on test set of 1057 songs

This commit is contained in:
Samuel Findlay 2012-04-30 20:57:44 +10:00
parent ea9bfb160d
commit 370603c779
2 changed files with 35 additions and 17 deletions

View File

@ -173,10 +173,10 @@ class SongImportForm(OpenLPWizard):
self.onFoilPresenterRemoveButtonClicked)
QtCore.QObject.connect(self.powerSongAddButton,
QtCore.SIGNAL(u'clicked()'),
self.powerSongAddButtonClicked)
self.onPowerSongAddButtonClicked)
QtCore.QObject.connect(self.powerSongRemoveButton,
QtCore.SIGNAL(u'clicked()'),
self.powerSongRemoveButtonClicked)
self.onPowerSongRemoveButtonClicked)
def addCustomPages(self):
"""

View File

@ -29,6 +29,7 @@ The :mod:`powersongimport` module provides the functionality for importing
PowerSong songs into the OpenLP database.
"""
import logging
import re
from openlp.core.lib import translate
from openlp.plugins.songs.lib.songimport import SongImport
@ -52,7 +53,7 @@ class PowerSongImport(SongImport):
* ``ENQ`` (0x05) ``TITLE``
* ``ACK`` (0x06) ``AUTHOR``
* ``CR`` (0x0D) ``COPYRIGHTLINE``
* ``CR`` (0x0d) ``COPYRIGHTLINE``
* ``EOT`` (0x04) ``PART``
The field label is separated from the field contents by one random byte.
@ -63,14 +64,14 @@ class PowerSongImport(SongImport):
* This is followed by zero or more AUTHOR fields.
* The next field is always COPYRIGHTLINE, but it may be empty (in which
case the byte following the label is the null byte 0x00).
When the field contents are not empty, the first byte is 0xC2 and
When the field contents are not empty, the first byte is 0xc2 and
should be discarded.
This field may contain a CCLI number at the end: e.g. "CCLI 176263"
Lyrics fields:
* The COPYRIGHTLINE field is followed by zero or more PART fields, each
of which contains one verse.
* Lines have Windows line endings ``CRLF`` (0x0D, 0x0A).
* Lines have Windows line endings ``CRLF`` (0x0d, 0x0a).
* There is no concept of verse types.
Valid extensions for a PowerSong song file are:
@ -102,11 +103,11 @@ class PowerSongImport(SongImport):
('Invalid PowerSong song file. Missing '
'"\x05TITLE" header.'))))
continue
song_data = song_file.read()
song_data = unicode(song_file.read(), u'utf-8', u'replace')
# Extract title and author fields
first_part, sep, song_data = song_data.partition(
u'\x0DCOPYRIGHTLINE')
if sep == '':
if not sep:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport',
('Invalid PowerSong song file. Missing '
@ -114,30 +115,47 @@ class PowerSongImport(SongImport):
continue
title_authors = first_part.split(u'\x06AUTHOR')
# Get the song title
self.title = title_authors[0][1:]
self.title = self.stripControlChars(title_authors[0][1:])
# Extract the author(s)
for author in title_authors[1:]:
self.parseAuthor(author[1:])
self.parseAuthor(self.stripControlChars(author[1:]))
# Get copyright and CCLI number
copyright, sep, song_data = song_data.partition(
u'\x04PART')
if sep == '':
if not sep:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport',
('No verses found. Missing '
'"\x04PART" string(s).'))))
'"\x04PART" string.'))))
continue
copyright, sep, ccli_no = copyright[1:].rpartition(u'CCLI ')
if copyright[0] == u'\xC2':
copyright = copyright[1:]
self.addCopyright(copyright)
if ccli_no != '':
if not sep:
copyright = ccli_no
ccli_no = u''
if copyright:
if copyright[0] == u'\u00c2':
copyright = copyright[1:]
self.addCopyright(self.stripControlChars(
copyright.rstrip(u'\n')))
if ccli_no:
ccli_no = ccli_no.strip()
if ccli_no.isdigit():
self.ccliNumber = ccli_no
self.ccliNumber = self.stripControlChars(ccli_no)
# Get the verse(s)
verses = song_data.split(u'\x04PART')
for verse in verses:
self.addVerse(verse[1:])
self.addVerse(self.stripControlChars(verse[1:]))
if not self.finish():
self.logError(file)
def stripControlChars(self, text):
"""
Get rid of ASCII control characters.
Illegals chars are ASCII code points 0-31 and 127, except:
* ``HT`` (0x09) - Tab
* ``LF`` (0x0a) - Line feed
* ``CR`` (0x0d) - Carriage return
"""
ILLEGAL_CHARS = u'([\x00-\x08\x0b-\x0c\x0e-\x1f\x7f])'
return re.sub(ILLEGAL_CHARS, '', text)