forked from openlp/openlp
Rewrote PowerSongImport class to read variable-length strings directly from file, rather than searching for them. Other minor fixes.
This commit is contained in:
parent
1184e9219d
commit
63b71802ab
@ -29,7 +29,6 @@ The :mod:`powersongimport` module provides the functionality for importing
|
|||||||
PowerSong songs into the OpenLP database.
|
PowerSong songs into the OpenLP database.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
|
|
||||||
from openlp.core.lib import translate
|
from openlp.core.lib import translate
|
||||||
from openlp.plugins.songs.lib.songimport import SongImport
|
from openlp.plugins.songs.lib.songimport import SongImport
|
||||||
@ -43,34 +42,27 @@ class PowerSongImport(SongImport):
|
|||||||
|
|
||||||
**PowerSong Song File Format:**
|
**PowerSong Song File Format:**
|
||||||
|
|
||||||
* Encoded as UTF-8.
|
The file has a number of label-field pairs of variable length.
|
||||||
* The file has a number of fields, with the song metadata fields first,
|
|
||||||
followed by the lyrics fields.
|
|
||||||
|
|
||||||
Fields:
|
Labels and Fields:
|
||||||
Each field begins with one of four labels, each of which begin with one
|
* Every label and field is preceded by an integer which specifies its
|
||||||
non-printing byte:
|
byte-length.
|
||||||
|
* If the length < 128 bytes, only one byte is used to encode
|
||||||
* ``ENQ`` (0x05) ``TITLE``
|
the length integer.
|
||||||
* ``ACK`` (0x06) ``AUTHOR``
|
* But if it's greater, as many bytes are used as necessary:
|
||||||
* ``CR`` (0x0d) ``COPYRIGHTLINE``
|
* the first byte = (length % 128) + 128
|
||||||
* ``EOT`` (0x04) ``PART``
|
* the next byte = length / 128
|
||||||
|
* another byte is only used if (length / 128) >= 128
|
||||||
The field label is separated from the field contents by one random byte.
|
* and so on (3 bytes needed iff length > 16383)
|
||||||
Each field ends at the next field label, or at the end of the file.
|
|
||||||
|
|
||||||
Metadata fields:
|
Metadata fields:
|
||||||
* Every PowerSong file begins with a TITLE field.
|
* Every PowerSong file begins with a TITLE field.
|
||||||
* This is followed by zero or more AUTHOR fields.
|
* This is followed by zero or more AUTHOR fields.
|
||||||
* The next field is always COPYRIGHTLINE, but it may be empty (in which
|
* The next label is always COPYRIGHTLINE, but its field may be empty.
|
||||||
case the byte following the label is the null byte 0x00).
|
This field may also contain a CCLI number: e.g. "CCLI 176263".
|
||||||
When the field contents are not empty, the first byte is 0xc2 and
|
|
||||||
should be discarded.
|
|
||||||
This field may contain a CCLI number at the end: e.g. "CCLI 176263"
|
|
||||||
|
|
||||||
Lyrics fields:
|
Lyrics fields:
|
||||||
* The COPYRIGHTLINE field is followed by zero or more PART fields, each
|
* Each verse is contained in a PART field.
|
||||||
of which contains one verse.
|
|
||||||
* Lines have Windows line endings ``CRLF`` (0x0d, 0x0a).
|
* Lines have Windows line endings ``CRLF`` (0x0d, 0x0a).
|
||||||
* There is no concept of verse types.
|
* There is no concept of verse types.
|
||||||
|
|
||||||
@ -78,12 +70,6 @@ class PowerSongImport(SongImport):
|
|||||||
* .song
|
* .song
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, manager, **kwargs):
|
|
||||||
"""
|
|
||||||
Initialise the PowerSong importer.
|
|
||||||
"""
|
|
||||||
SongImport.__init__(self, manager, **kwargs)
|
|
||||||
|
|
||||||
def doImport(self):
|
def doImport(self):
|
||||||
"""
|
"""
|
||||||
Receive a single file or a list of files to import.
|
Receive a single file or a list of files to import.
|
||||||
@ -94,67 +80,78 @@ class PowerSongImport(SongImport):
|
|||||||
if self.stopImportFlag:
|
if self.stopImportFlag:
|
||||||
return
|
return
|
||||||
self.setDefaults()
|
self.setDefaults()
|
||||||
with open(file, 'rb') as song_file:
|
with open(file, 'rb') as self.song_file:
|
||||||
# Check file is valid PowerSong song format
|
# Get title and check file is valid PowerSong song format
|
||||||
if song_file.read(6) != u'\x05TITLE':
|
label, field = self.readLabelField()
|
||||||
|
if label != u'TITLE':
|
||||||
self.logError(file, unicode(
|
self.logError(file, unicode(
|
||||||
translate('SongsPlugin.PowerSongSongImport',
|
translate('SongsPlugin.PowerSongSongImport',
|
||||||
('Invalid PowerSong song file. Missing '
|
('Invalid PowerSong song file. Missing '
|
||||||
'"\x05TITLE" header.'))))
|
'"TITLE" header.'))))
|
||||||
continue
|
continue
|
||||||
song_data = unicode(song_file.read(), u'utf-8', u'replace')
|
else:
|
||||||
# Extract title and author fields
|
self.title = field.replace(u'\n', u' ')
|
||||||
first_part, sep, song_data = song_data.partition(
|
while label:
|
||||||
u'\x0DCOPYRIGHTLINE')
|
label, field = self.readLabelField()
|
||||||
if not sep:
|
# Get the author(s)
|
||||||
self.logError(file, unicode(
|
if label == u'AUTHOR':
|
||||||
translate('SongsPlugin.PowerSongSongImport',
|
self.parseAuthor(field)
|
||||||
('Invalid PowerSong song file. Missing '
|
# Get copyright and look for CCLI number
|
||||||
'"\x0DCOPYRIGHTLINE" string.'))))
|
elif label == u'COPYRIGHTLINE':
|
||||||
continue
|
found_copyright = True
|
||||||
title_authors = first_part.split(u'\x06AUTHOR')
|
copyright, sep, ccli_no = field.rpartition(u'CCLI')
|
||||||
# Get the song title
|
|
||||||
self.title = self.stripControlChars(title_authors[0][1:])
|
|
||||||
# Extract the author(s)
|
|
||||||
for author in title_authors[1:]:
|
|
||||||
self.parseAuthor(self.stripControlChars(author[1:]))
|
|
||||||
# Get copyright and CCLI number
|
|
||||||
copyright, sep, song_data = song_data.partition(
|
|
||||||
u'\x04PART')
|
|
||||||
if not sep:
|
|
||||||
self.logError(file, unicode(
|
|
||||||
translate('SongsPlugin.PowerSongSongImport',
|
|
||||||
('No verses found. Missing '
|
|
||||||
'"\x04PART" string.'))))
|
|
||||||
continue
|
|
||||||
copyright, sep, ccli_no = copyright[1:].rpartition(u'CCLI ')
|
|
||||||
if not sep:
|
if not sep:
|
||||||
copyright = ccli_no
|
copyright = ccli_no
|
||||||
ccli_no = u''
|
ccli_no = u''
|
||||||
if copyright:
|
if copyright:
|
||||||
if copyright[0] == u'\u00c2':
|
self.addCopyright(copyright.rstrip(
|
||||||
copyright = copyright[1:]
|
u'\n').replace(u'\n', u' '))
|
||||||
self.addCopyright(self.stripControlChars(
|
|
||||||
copyright.rstrip(u'\n')))
|
|
||||||
if ccli_no:
|
if ccli_no:
|
||||||
ccli_no = ccli_no.strip()
|
ccli_no = ccli_no.strip(u' :')
|
||||||
if ccli_no.isdigit():
|
if ccli_no.isdigit():
|
||||||
self.ccliNumber = self.stripControlChars(ccli_no)
|
self.ccliNumber = ccli_no
|
||||||
# Get the verse(s)
|
# Get verse(s)
|
||||||
verses = song_data.split(u'\x04PART')
|
elif label == u'PART':
|
||||||
for verse in verses:
|
self.addVerse(field)
|
||||||
self.addVerse(self.stripControlChars(verse[1:]))
|
# Check for copyright label
|
||||||
|
if not found_copyright:
|
||||||
|
self.logError(file, unicode(
|
||||||
|
translate('SongsPlugin.PowerSongSongImport',
|
||||||
|
('"%s" Invalid PowerSong song file. Missing '
|
||||||
|
'"COPYRIGHTLINE" string.' % self.title))))
|
||||||
|
continue
|
||||||
|
# Check for at least one verse
|
||||||
|
if not self.verses:
|
||||||
|
self.logError(file, unicode(
|
||||||
|
translate('SongsPlugin.PowerSongSongImport',
|
||||||
|
('"%s" No verses found. Missing "PART" string.'
|
||||||
|
% self.title))))
|
||||||
|
continue
|
||||||
if not self.finish():
|
if not self.finish():
|
||||||
self.logError(file)
|
self.logError(file)
|
||||||
|
|
||||||
def stripControlChars(self, text):
|
def readLabelField(self):
|
||||||
"""
|
"""
|
||||||
Get rid of ASCII control characters.
|
Return as a 2-tuple the next two variable-length strings from song file
|
||||||
|
"""
|
||||||
|
label = unicode(self.song_file.read(
|
||||||
|
self.readLength()), u'utf-8', u'ignore')
|
||||||
|
if label:
|
||||||
|
field = unicode(self.song_file.read(
|
||||||
|
self.readLength()), u'utf-8', u'ignore')
|
||||||
|
else:
|
||||||
|
field = u''
|
||||||
|
return label, field
|
||||||
|
|
||||||
Illegals chars are ASCII code points 0-31 and 127, except:
|
def readLength(self):
|
||||||
* ``HT`` (0x09) - Tab
|
|
||||||
* ``LF`` (0x0a) - Line feed
|
|
||||||
* ``CR`` (0x0d) - Carriage return
|
|
||||||
"""
|
"""
|
||||||
ILLEGAL_CHARS = u'([\x00-\x08\x0b-\x0c\x0e-\x1f\x7f])'
|
Return the byte-length of the next variable-length string in song file
|
||||||
return re.sub(ILLEGAL_CHARS, '', text)
|
"""
|
||||||
|
this_byte_char = self.song_file.read(1)
|
||||||
|
if not this_byte_char:
|
||||||
|
return 0
|
||||||
|
this_byte = ord(this_byte_char)
|
||||||
|
if this_byte < 128:
|
||||||
|
return this_byte
|
||||||
|
else:
|
||||||
|
return (self.readLength() * 128) + (this_byte - 128)
|
||||||
|
Loading…
Reference in New Issue
Block a user