Rewrote PowerSongImport class to read variable-length strings directly from file, rather than searching for them. Other minor fixes.

This commit is contained in:
Samuel Findlay 2012-05-01 23:51:46 +10:00
parent 1184e9219d
commit 63b71802ab
2 changed files with 77 additions and 80 deletions

View File

@ -29,7 +29,6 @@ The :mod:`powersongimport` module provides the functionality for importing
PowerSong songs into the OpenLP database. PowerSong songs into the OpenLP database.
""" """
import logging import logging
import re
from openlp.core.lib import translate from openlp.core.lib import translate
from openlp.plugins.songs.lib.songimport import SongImport from openlp.plugins.songs.lib.songimport import SongImport
@ -43,34 +42,27 @@ class PowerSongImport(SongImport):
**PowerSong Song File Format:** **PowerSong Song File Format:**
* Encoded as UTF-8. The file has a number of label-field pairs of variable length.
* The file has a number of fields, with the song metadata fields first,
followed by the lyrics fields.
Fields: Labels and Fields:
Each field begins with one of four labels, each of which begin with one * Every label and field is preceded by an integer which specifies its
non-printing byte: byte-length.
* If the length < 128 bytes, only one byte is used to encode
* ``ENQ`` (0x05) ``TITLE`` the length integer.
* ``ACK`` (0x06) ``AUTHOR`` * But if it's greater, as many bytes are used as necessary:
* ``CR`` (0x0d) ``COPYRIGHTLINE`` * the first byte = (length % 128) + 128
* ``EOT`` (0x04) ``PART`` * the next byte = length / 128
* another byte is only used if (length / 128) >= 128
The field label is separated from the field contents by one random byte. * and so on (3 bytes needed iff length > 16383)
Each field ends at the next field label, or at the end of the file.
Metadata fields: Metadata fields:
* Every PowerSong file begins with a TITLE field. * Every PowerSong file begins with a TITLE field.
* This is followed by zero or more AUTHOR fields. * This is followed by zero or more AUTHOR fields.
* The next field is always COPYRIGHTLINE, but it may be empty (in which * The next label is always COPYRIGHTLINE, but its field may be empty.
case the byte following the label is the null byte 0x00). This field may also contain a CCLI number: e.g. "CCLI 176263".
When the field contents are not empty, the first byte is 0xc2 and
should be discarded.
This field may contain a CCLI number at the end: e.g. "CCLI 176263"
Lyrics fields: Lyrics fields:
* The COPYRIGHTLINE field is followed by zero or more PART fields, each * Each verse is contained in a PART field.
of which contains one verse.
* Lines have Windows line endings ``CRLF`` (0x0d, 0x0a). * Lines have Windows line endings ``CRLF`` (0x0d, 0x0a).
* There is no concept of verse types. * There is no concept of verse types.
@ -78,12 +70,6 @@ class PowerSongImport(SongImport):
* .song * .song
""" """
def __init__(self, manager, **kwargs):
"""
Initialise the PowerSong importer.
"""
SongImport.__init__(self, manager, **kwargs)
def doImport(self): def doImport(self):
""" """
Receive a single file or a list of files to import. Receive a single file or a list of files to import.
@ -94,67 +80,78 @@ class PowerSongImport(SongImport):
if self.stopImportFlag: if self.stopImportFlag:
return return
self.setDefaults() self.setDefaults()
with open(file, 'rb') as song_file: with open(file, 'rb') as self.song_file:
# Check file is valid PowerSong song format # Get title and check file is valid PowerSong song format
if song_file.read(6) != u'\x05TITLE': label, field = self.readLabelField()
if label != u'TITLE':
self.logError(file, unicode( self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport', translate('SongsPlugin.PowerSongSongImport',
('Invalid PowerSong song file. Missing ' ('Invalid PowerSong song file. Missing '
'"\x05TITLE" header.')))) '"TITLE" header.'))))
continue continue
song_data = unicode(song_file.read(), u'utf-8', u'replace') else:
# Extract title and author fields self.title = field.replace(u'\n', u' ')
first_part, sep, song_data = song_data.partition( while label:
u'\x0DCOPYRIGHTLINE') label, field = self.readLabelField()
if not sep: # Get the author(s)
self.logError(file, unicode( if label == u'AUTHOR':
translate('SongsPlugin.PowerSongSongImport', self.parseAuthor(field)
('Invalid PowerSong song file. Missing ' # Get copyright and look for CCLI number
'"\x0DCOPYRIGHTLINE" string.')))) elif label == u'COPYRIGHTLINE':
continue found_copyright = True
title_authors = first_part.split(u'\x06AUTHOR') copyright, sep, ccli_no = field.rpartition(u'CCLI')
# Get the song title
self.title = self.stripControlChars(title_authors[0][1:])
# Extract the author(s)
for author in title_authors[1:]:
self.parseAuthor(self.stripControlChars(author[1:]))
# Get copyright and CCLI number
copyright, sep, song_data = song_data.partition(
u'\x04PART')
if not sep:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport',
('No verses found. Missing '
'"\x04PART" string.'))))
continue
copyright, sep, ccli_no = copyright[1:].rpartition(u'CCLI ')
if not sep: if not sep:
copyright = ccli_no copyright = ccli_no
ccli_no = u'' ccli_no = u''
if copyright: if copyright:
if copyright[0] == u'\u00c2': self.addCopyright(copyright.rstrip(
copyright = copyright[1:] u'\n').replace(u'\n', u' '))
self.addCopyright(self.stripControlChars(
copyright.rstrip(u'\n')))
if ccli_no: if ccli_no:
ccli_no = ccli_no.strip() ccli_no = ccli_no.strip(u' :')
if ccli_no.isdigit(): if ccli_no.isdigit():
self.ccliNumber = self.stripControlChars(ccli_no) self.ccliNumber = ccli_no
# Get the verse(s) # Get verse(s)
verses = song_data.split(u'\x04PART') elif label == u'PART':
for verse in verses: self.addVerse(field)
self.addVerse(self.stripControlChars(verse[1:])) # Check for copyright label
if not found_copyright:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport',
('"%s" Invalid PowerSong song file. Missing '
'"COPYRIGHTLINE" string.' % self.title))))
continue
# Check for at least one verse
if not self.verses:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport',
('"%s" No verses found. Missing "PART" string.'
% self.title))))
continue
if not self.finish(): if not self.finish():
self.logError(file) self.logError(file)
def stripControlChars(self, text): def readLabelField(self):
""" """
Get rid of ASCII control characters. Return as a 2-tuple the next two variable-length strings from song file
"""
label = unicode(self.song_file.read(
self.readLength()), u'utf-8', u'ignore')
if label:
field = unicode(self.song_file.read(
self.readLength()), u'utf-8', u'ignore')
else:
field = u''
return label, field
Illegals chars are ASCII code points 0-31 and 127, except: def readLength(self):
* ``HT`` (0x09) - Tab
* ``LF`` (0x0a) - Line feed
* ``CR`` (0x0d) - Carriage return
""" """
ILLEGAL_CHARS = u'([\x00-\x08\x0b-\x0c\x0e-\x1f\x7f])' Return the byte-length of the next variable-length string in song file
return re.sub(ILLEGAL_CHARS, '', text) """
this_byte_char = self.song_file.read(1)
if not this_byte_char:
return 0
this_byte = ord(this_byte_char)
if this_byte < 128:
return this_byte
else:
return (self.readLength() * 128) + (this_byte - 128)