Implemented BinaryReader.Read7BitEncodedInt from .NET. Tidy code.

This commit is contained in:
Samuel Findlay 2012-05-03 22:41:49 +10:00
parent 76de0f0c60
commit 45c180308a

View File

@ -42,31 +42,30 @@ class PowerSongImport(SongImport):
**PowerSong Song File Format:** **PowerSong Song File Format:**
The file has a number of label-field pairs of variable length. The file has a number of label-field pairs.
Labels and Fields: Label and Field strings:
* Every label and field is preceded by an integer which specifies its
byte-length. * Every label and field is a variable length string preceded by an
* If the length < 128 bytes, only one byte is used to encode integer specifying it's byte length.
the length integer. * Integer is 32-bit but is encoded in 7-bit format to save space. Thus
* But if it's greater, as many bytes are used as necessary: if length will fit in 7 bits (ie <= 127) it takes up only one byte.
* the first byte = (length % 128) + 128
* the next byte = length / 128
* another byte is only used if (length / 128) >= 128
* and so on (3 bytes needed iff length > 16383)
Metadata fields: Metadata fields:
* Every PowerSong file begins with a TITLE field.
* This is followed by zero or more AUTHOR fields. * Every PowerSong file has a TITLE field.
* The next label is always COPYRIGHTLINE, but its field may be empty. * There is zero or more AUTHOR fields.
* There is always a COPYRIGHTLINE label, but its field may be empty.
This field may also contain a CCLI number: e.g. "CCLI 176263". This field may also contain a CCLI number: e.g. "CCLI 176263".
Lyrics fields: Lyrics fields:
* Each verse is contained in a PART field. * Each verse is contained in a PART field.
* Lines have Windows line endings ``CRLF`` (0x0d, 0x0a). * Lines have Windows line endings ``CRLF`` (0x0d, 0x0a).
* There is no concept of verse types. * There is no concept of verse types.
Valid extensions for a PowerSong song file are: Valid extensions for a PowerSong song file are:
* .song * .song
""" """
@ -75,6 +74,8 @@ class PowerSongImport(SongImport):
Receive a list of files to import. Receive a list of files to import.
""" """
if not isinstance(self.importSource, list): if not isinstance(self.importSource, list):
self.logError(unicode(translate('SongsPlugin.PowerSongImport',
'No files to import.')))
return return
self.importWizard.progressBar.setMaximum(len(self.importSource)) self.importWizard.progressBar.setMaximum(len(self.importSource))
for file in self.importSource: for file in self.importSource:
@ -82,83 +83,103 @@ class PowerSongImport(SongImport):
return return
self.setDefaults() self.setDefaults()
parse_error = False parse_error = False
with open(file, 'rb') as self.song_file: with open(file, 'rb') as song_data:
# Get title to check file is valid PowerSong song format
label, field = self.readLabelField()
if label == u'TITLE':
self.title = field.replace(u'\n', u' ')
else:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport', \
'Invalid PowerSong file. Missing "TITLE" header.')))
continue
# Get rest of fields from file
while True: while True:
label, field = self.readLabelField() try:
if not label: label = self._readString(song_data)
break if not label:
if label == u'AUTHOR': break
self.parseAuthor(field) field = self._readString(song_data)
elif label == u'COPYRIGHTLINE': except ValueError:
found_copyright = True
self.parseCopyrightCCLI(field)
elif label == u'PART':
self.addVerse(field)
else:
parse_error = True parse_error = True
self.logError(file, unicode( self.logError(file, unicode(
translate('SongsPlugin.PowerSongSongImport', \ translate('SongsPlugin.PowerSongImport',
'"%s" Invalid PowerSong file. Unknown header: "%s".' 'Invalid PowerSong file. Unexpected byte value.')))
% (self.title, label))))
break break
if parse_error: else:
continue if label == u'TITLE':
# Check that file had COPYRIGHTLINE label self.title = field.replace(u'\n', u' ')
if not found_copyright: elif label == u'AUTHOR':
self.logError(file, unicode( self.parseAuthor(field)
translate('SongsPlugin.PowerSongSongImport', \ elif label == u'COPYRIGHTLINE':
'"%s" Invalid PowerSong file. Missing "COPYRIGHTLINE" \ found_copyright = True
header.' % self.title))) self._parseCopyrightCCLI(field)
continue elif label == u'PART':
# Check that file had at least one verse self.addVerse(field)
if not self.verses: if parse_error:
self.logError(file, unicode( continue
translate('SongsPlugin.PowerSongSongImport', \ # Check that file had TITLE field
'"%s" Verses not found. Missing "PART" header.' if not self.title:
% self.title))) self.logError(file, unicode(
continue translate('SongsPlugin.PowerSongImport',
'Invalid PowerSong file. Missing "TITLE" header.')))
continue
# Check that file had COPYRIGHTLINE label
if not found_copyright:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongImport',
'"%s" Invalid PowerSong file. Missing "COPYRIGHTLINE" '
'header.' % self.title)))
continue
# Check that file had at least one verse
if not self.verses:
self.logError(file, unicode(
translate('SongsPlugin.PowerSongImport',
'"%s" Verses not found. Missing "PART" header.'
% self.title)))
continue
if not self.finish(): if not self.finish():
self.logError(file) self.logError(file)
def readLabelField(self): def _readString(self, file_object):
""" """
Read (as a 2-tuple) the next two variable-length strings Reads in next variable-length string.
""" """
label = unicode(self.song_file.read( string_len = self._read7BitEncodedInteger(file_object)
self.readLength()), u'utf-8', u'ignore') return unicode(file_object.read(string_len), u'utf-8', u'ignore')
if label:
field = unicode(self.song_file.read(
self.readLength()), u'utf-8', u'ignore')
else:
field = u''
return label, field
def readLength(self): def _read7BitEncodedInteger(self, file_object):
""" """
Read the byte-length of the next variable-length string Reads in a 32-bit integer in compressed 7-bit format.
If at the end of the file, returns 0. Accomplished by reading the integer 7 bits at a time. The high bit
of the byte when set means to continue reading more bytes.
If the integer will fit in 7 bits (ie <= 127), it only takes up one
byte. Otherwise, it may take up to 5 bytes.
Reference: .NET method System.IO.BinaryReader.Read7BitEncodedInt
""" """
this_byte = self.song_file.read(1) val = 0
if not this_byte: shift = 0
i = 0
while True:
# Check for corrupted stream (since max 5 bytes per 32-bit integer)
if i == 5:
raise ValueError
byte = self._readByte(file_object)
# Strip high bit and shift left
val += (byte & 0x7f) << shift
shift += 7
high_bit_set = byte & 0x80
if not high_bit_set:
break
i += 1
return val
def _readByte(self, file_object):
"""
Reads in next byte as an unsigned integer
Note: returns 0 at end of file.
"""
byte_str = file_object.read(1)
# If read result is empty, then reached end of file
if not byte_str:
return 0 return 0
this_byte_val = ord(this_byte)
if this_byte_val < 128:
return this_byte_val
else: else:
return (self.readLength() * 128) + (this_byte_val - 128) return ord(byte_str)
def parseCopyrightCCLI(self, field): def _parseCopyrightCCLI(self, field):
""" """
Look for CCLI song number, and get copyright Look for CCLI song number, and get copyright
""" """