From 21a88085e37b0f1eb5b94dea3c59529115241fdd Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Mon, 30 Apr 2012 00:08:25 +1000 Subject: [PATCH 1/8] First attempt at class openlp.plugins.songs.lib.PowerSongImport --- openlp/core/ui/wizard.py | 1 + openlp/plugins/songs/lib/powersongimport.py | 142 ++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 openlp/plugins/songs/lib/powersongimport.py diff --git a/openlp/core/ui/wizard.py b/openlp/core/ui/wizard.py index 5369c9799..74dcfceaf 100644 --- a/openlp/core/ui/wizard.py +++ b/openlp/core/ui/wizard.py @@ -53,6 +53,7 @@ class WizardStrings(object): OL = u'OpenLyrics' OS = u'OpenSong' OSIS = u'OSIS' + PS = u'PowerSong' SB = u'SongBeamer' SoF = u'Songs of Fellowship' SSP = u'SongShow Plus' diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py new file mode 100644 index 000000000..973f87795 --- /dev/null +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2012 Raoul Snyman # +# Portions copyright (c) 2008-2012 Tim Bentley, Gerald Britton, Jonathan # +# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # +# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # +# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # +# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +The :mod:`powersongimport` module provides the functionality for importing +PowerSong songs into the OpenLP database. +""" +import logging + +from openlp.core.lib import translate +from openlp.plugins.songs.lib.songimport import SongImport + +log = logging.getLogger(__name__) + +class PowerSongImport(SongImport): + """ + The :class:`PowerSongImport` class provides the ability to import song files + from PowerSong. + + **PowerSong Song File Format:** + + * Encoded as UTF-8. + * The file has a number of fields, with the song metadata fields first, + followed by the lyrics fields. + + Fields: + Each field begins with one of four labels, each of which begin with one + non-printing byte: + + * ``ENQ`` (0x05) ``TITLE`` + * ``ACK`` (0x06) ``AUTHOR`` + * ``CR`` (0x0D) ``COPYRIGHTLINE`` + * ``EOT`` (0x04) ``PART`` + + The field label is separated from the field contents by one random byte. + Each field ends at the next field label, or at the end of the file. + + Metadata fields: + * Every PowerSong file begins with a TITLE field. + * This is followed by zero or more AUTHOR fields. + * The next field is always COPYRIGHTLINE, but it may be empty (in which + case the byte following the label is the null byte 0x00). + When the field contents are not empty, the first byte is 0xC2 and + should be discarded. + This field may contain a CCLI number at the end: e.g. "CCLI 176263" + + Lyrics fields: + * The COPYRIGHTLINE field is followed by zero or more PART fields, each + of which contains one verse. + * Lines have Windows line endings ``CRLF`` (0x0D, 0x0A). + * There is no concept of verse types. + + Valid extensions for a PowerSong song file are: + + * .song + """ + + def __init__(self, manager, **kwargs): + """ + Initialise the PowerSong importer. + """ + SongImport.__init__(self, manager, **kwargs) + + def doImport(self): + """ + Receive a single file or a list of files to import. + """ + if isinstance(self.importSource, list): + self.importWizard.progressBar.setMaximum(len(self.importSource)) + for file in self.importSource: + if self.stopImportFlag: + return + self.setDefaults() + with open(file, 'rb') as song_file: + # Check file is valid PowerSong song format + if song_file.read(6) != u'\x05TITLE': + self.logError(file, unicode( + translate('SongsPlugin.PowerSongSongImport', + ('Invalid PowerSong song file. Missing ' + '"\x05TITLE" header.')))) + continue + song_data = song_file.read() + first_part, sep, song_data = song_data.partition( + u'\x0DCOPYRIGHTLINE') + if sep == '': + self.logError(file, unicode( + translate('SongsPlugin.PowerSongSongImport', + ('Invalid PowerSong song file. Missing ' + '"\x0DCOPYRIGHTLINE" string.')))) + continue + title_authors = first_part.split(u'\x06AUTHOR') + # Get the song title + self.title = title_authors[0][1:] + # Extract the author(s) + for author in title_authors[1:]: + self.parseAuthor(author[1:]) + # Get copyright and CCLI number + copyright, sep, song_data = song_data.partition( + u'\x04PART') + if sep == '': + self.logError(file, unicode( + translate('SongsPlugin.PowerSongSongImport', + ('No verses found. Missing ' + '"\x04PART" string(s).')))) + continue + copyright, sep, ccli_no = copyright[1:].rpartition(u'CCLI ') + if copyright[0] == u'\xC2': + copyright = copyright[1:] + self.addCopyright(copyright) + if ccli_no != '': + ccli_no = ccli_no.strip() + if ccli_no.isdigit(): + self.ccliNumber = ccli_no + # Get the verse(s) + verses = song_data.split(u'\x04PART') + for verse in verses: + self.addVerse(verse[1:]) + if not self.finish(): + self.logError(file) From ea9bfb160d4f6a3bb8f1900c25fbf6e433b5f4fc Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Mon, 30 Apr 2012 16:24:04 +1000 Subject: [PATCH 2/8] Integrated module openlp.plugins.songs.lib.powersongimport --- openlp/plugins/songs/forms/songimportform.py | 43 ++++++++++++++++++++ openlp/plugins/songs/lib/importer.py | 15 ++++--- openlp/plugins/songs/lib/powersongimport.py | 1 + 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/openlp/plugins/songs/forms/songimportform.py b/openlp/plugins/songs/forms/songimportform.py index 4a44c30ef..686c3d69e 100644 --- a/openlp/plugins/songs/forms/songimportform.py +++ b/openlp/plugins/songs/forms/songimportform.py @@ -171,6 +171,12 @@ class SongImportForm(OpenLPWizard): QtCore.QObject.connect(self.foilPresenterRemoveButton, QtCore.SIGNAL(u'clicked()'), self.onFoilPresenterRemoveButtonClicked) + QtCore.QObject.connect(self.powerSongAddButton, + QtCore.SIGNAL(u'clicked()'), + self.powerSongAddButtonClicked) + QtCore.QObject.connect(self.powerSongRemoveButton, + QtCore.SIGNAL(u'clicked()'), + self.powerSongRemoveButtonClicked) def addCustomPages(self): """ @@ -217,6 +223,8 @@ class SongImportForm(OpenLPWizard): self.addFileSelectItem(u'foilPresenter') # Open Song self.addFileSelectItem(u'openSong', u'OpenSong') + # PowerSong + self.addFileSelectItem(u'powerSong') # SongBeamer self.addFileSelectItem(u'songBeamer') # Song Show Plus @@ -264,6 +272,8 @@ class SongImportForm(OpenLPWizard): self.formatComboBox.setItemText( SongFormat.FoilPresenter, WizardStrings.FP) self.formatComboBox.setItemText(SongFormat.OpenSong, WizardStrings.OS) + self.formatComboBox.setItemText( + SongFormat.PowerSong, WizardStrings.PS) self.formatComboBox.setItemText( SongFormat.SongBeamer, WizardStrings.SB) self.formatComboBox.setItemText( @@ -305,6 +315,10 @@ class SongImportForm(OpenLPWizard): translate('SongsPlugin.ImportWizardForm', 'Add Files...')) self.dreamBeamRemoveButton.setText( translate('SongsPlugin.ImportWizardForm', 'Remove File(s)')) + self.powerSongAddButton.setText( + translate('SongsPlugin.ImportWizardForm', 'Add Files...')) + self.powerSongRemoveButton.setText( + translate('SongsPlugin.ImportWizardForm', 'Remove File(s)')) self.songsOfFellowshipAddButton.setText( translate('SongsPlugin.ImportWizardForm', 'Add Files...')) self.songsOfFellowshipRemoveButton.setText( @@ -417,6 +431,12 @@ class SongImportForm(OpenLPWizard): WizardStrings.YouSpecifyFile % WizardStrings.DB) self.dreamBeamAddButton.setFocus() return False + elif source_format == SongFormat.PowerSong: + if self.powerSongFileListWidget.count() == 0: + critical_error_message_box(UiStrings().NFSp, + WizardStrings.YouSpecifyFile % WizardStrings.PS) + self.powerSongAddButton.setFocus() + return False elif source_format == SongFormat.SongsOfFellowship: if self.songsOfFellowshipFileListWidget.count() == 0: critical_error_message_box(UiStrings().NFSp, @@ -600,6 +620,22 @@ class SongImportForm(OpenLPWizard): """ self.removeSelectedItems(self.dreamBeamFileListWidget) + def onPowerSongAddButtonClicked(self): + """ + Get PowerSong song database files + """ + self.getFiles(WizardStrings.OpenTypeFile % WizardStrings.PS, + self.powerSongFileListWidget, u'%s (*.song)' + % translate('SongsPlugin.ImportWizardForm', + 'PowerSong Song Files') + ) + + def onPowerSongRemoveButtonClicked(self): + """ + Remove selected PowerSong files from the import list + """ + self.removeSelectedItems(self.powerSongFileListWidget) + def onSongsOfFellowshipAddButtonClicked(self): """ Get Songs of Fellowship song database files @@ -717,6 +753,7 @@ class SongImportForm(OpenLPWizard): self.wordsOfWorshipFileListWidget.clear() self.ccliFileListWidget.clear() self.dreamBeamFileListWidget.clear() + self.powerSongFileListWidget.clear() self.songsOfFellowshipFileListWidget.clear() self.genericFileListWidget.clear() self.easySlidesFilenameEdit.setText(u'') @@ -784,6 +821,12 @@ class SongImportForm(OpenLPWizard): filenames=self.getListOfFiles( self.dreamBeamFileListWidget) ) + elif source_format == SongFormat.PowerSong: + # Import PowerSong songs + importer = self.plugin.importSongs(SongFormat.PowerSong, + filenames=self.getListOfFiles( + self.powerSongFileListWidget) + ) elif source_format == SongFormat.SongsOfFellowship: # Import a Songs of Fellowship RTF file importer = self.plugin.importSongs(SongFormat.SongsOfFellowship, diff --git a/openlp/plugins/songs/lib/importer.py b/openlp/plugins/songs/lib/importer.py index 28a57339e..9dde9f0af 100644 --- a/openlp/plugins/songs/lib/importer.py +++ b/openlp/plugins/songs/lib/importer.py @@ -36,6 +36,7 @@ from openlyricsimport import OpenLyricsImport from wowimport import WowImport from cclifileimport import CCLIFileImport from dreambeamimport import DreamBeamImport +from powersongimport import PowerSongImport from ewimport import EasyWorshipSongImport from songbeamerimport import SongBeamerImport from songshowplusimport import SongShowPlusImport @@ -79,11 +80,12 @@ class SongFormat(object): EasyWorship = 7 FoilPresenter = 8 OpenSong = 9 - SongBeamer = 10 - SongShowPlus = 11 - SongsOfFellowship = 12 - WordsOfWorship = 13 - #CSV = 14 + PowerSong = 10 + SongBeamer = 11 + SongShowPlus = 12 + SongsOfFellowship = 13 + WordsOfWorship = 14 + #CSV = 15 @staticmethod def get_class(format): @@ -111,6 +113,8 @@ class SongFormat(object): return CCLIFileImport elif format == SongFormat.DreamBeam: return DreamBeamImport + elif format == SongFormat.PowerSong: + return PowerSongImport elif format == SongFormat.EasySlides: return EasySlidesImport elif format == SongFormat.EasyWorship: @@ -139,6 +143,7 @@ class SongFormat(object): SongFormat.EasyWorship, SongFormat.FoilPresenter, SongFormat.OpenSong, + SongFormat.PowerSong, SongFormat.SongBeamer, SongFormat.SongShowPlus, SongFormat.SongsOfFellowship, diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py index 973f87795..37d46e35c 100644 --- a/openlp/plugins/songs/lib/powersongimport.py +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -103,6 +103,7 @@ class PowerSongImport(SongImport): '"\x05TITLE" header.')))) continue song_data = song_file.read() + # Extract title and author fields first_part, sep, song_data = song_data.partition( u'\x0DCOPYRIGHTLINE') if sep == '': From 370603c779e58a83c8e34ce4097c144b0644eb90 Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Mon, 30 Apr 2012 20:57:44 +1000 Subject: [PATCH 3/8] PowerSong importer working. Successful on test set of 1057 songs --- openlp/plugins/songs/forms/songimportform.py | 4 +- openlp/plugins/songs/lib/powersongimport.py | 48 ++++++++++++++------ 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/openlp/plugins/songs/forms/songimportform.py b/openlp/plugins/songs/forms/songimportform.py index 686c3d69e..0cacae612 100644 --- a/openlp/plugins/songs/forms/songimportform.py +++ b/openlp/plugins/songs/forms/songimportform.py @@ -173,10 +173,10 @@ class SongImportForm(OpenLPWizard): self.onFoilPresenterRemoveButtonClicked) QtCore.QObject.connect(self.powerSongAddButton, QtCore.SIGNAL(u'clicked()'), - self.powerSongAddButtonClicked) + self.onPowerSongAddButtonClicked) QtCore.QObject.connect(self.powerSongRemoveButton, QtCore.SIGNAL(u'clicked()'), - self.powerSongRemoveButtonClicked) + self.onPowerSongRemoveButtonClicked) def addCustomPages(self): """ diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py index 37d46e35c..207777570 100644 --- a/openlp/plugins/songs/lib/powersongimport.py +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -29,6 +29,7 @@ The :mod:`powersongimport` module provides the functionality for importing PowerSong songs into the OpenLP database. """ import logging +import re from openlp.core.lib import translate from openlp.plugins.songs.lib.songimport import SongImport @@ -52,7 +53,7 @@ class PowerSongImport(SongImport): * ``ENQ`` (0x05) ``TITLE`` * ``ACK`` (0x06) ``AUTHOR`` - * ``CR`` (0x0D) ``COPYRIGHTLINE`` + * ``CR`` (0x0d) ``COPYRIGHTLINE`` * ``EOT`` (0x04) ``PART`` The field label is separated from the field contents by one random byte. @@ -63,14 +64,14 @@ class PowerSongImport(SongImport): * This is followed by zero or more AUTHOR fields. * The next field is always COPYRIGHTLINE, but it may be empty (in which case the byte following the label is the null byte 0x00). - When the field contents are not empty, the first byte is 0xC2 and + When the field contents are not empty, the first byte is 0xc2 and should be discarded. This field may contain a CCLI number at the end: e.g. "CCLI 176263" Lyrics fields: * The COPYRIGHTLINE field is followed by zero or more PART fields, each of which contains one verse. - * Lines have Windows line endings ``CRLF`` (0x0D, 0x0A). + * Lines have Windows line endings ``CRLF`` (0x0d, 0x0a). * There is no concept of verse types. Valid extensions for a PowerSong song file are: @@ -102,11 +103,11 @@ class PowerSongImport(SongImport): ('Invalid PowerSong song file. Missing ' '"\x05TITLE" header.')))) continue - song_data = song_file.read() + song_data = unicode(song_file.read(), u'utf-8', u'replace') # Extract title and author fields first_part, sep, song_data = song_data.partition( u'\x0DCOPYRIGHTLINE') - if sep == '': + if not sep: self.logError(file, unicode( translate('SongsPlugin.PowerSongSongImport', ('Invalid PowerSong song file. Missing ' @@ -114,30 +115,47 @@ class PowerSongImport(SongImport): continue title_authors = first_part.split(u'\x06AUTHOR') # Get the song title - self.title = title_authors[0][1:] + self.title = self.stripControlChars(title_authors[0][1:]) # Extract the author(s) for author in title_authors[1:]: - self.parseAuthor(author[1:]) + self.parseAuthor(self.stripControlChars(author[1:])) # Get copyright and CCLI number copyright, sep, song_data = song_data.partition( u'\x04PART') - if sep == '': + if not sep: self.logError(file, unicode( translate('SongsPlugin.PowerSongSongImport', ('No verses found. Missing ' - '"\x04PART" string(s).')))) + '"\x04PART" string.')))) continue copyright, sep, ccli_no = copyright[1:].rpartition(u'CCLI ') - if copyright[0] == u'\xC2': - copyright = copyright[1:] - self.addCopyright(copyright) - if ccli_no != '': + if not sep: + copyright = ccli_no + ccli_no = u'' + if copyright: + if copyright[0] == u'\u00c2': + copyright = copyright[1:] + self.addCopyright(self.stripControlChars( + copyright.rstrip(u'\n'))) + if ccli_no: ccli_no = ccli_no.strip() if ccli_no.isdigit(): - self.ccliNumber = ccli_no + self.ccliNumber = self.stripControlChars(ccli_no) # Get the verse(s) verses = song_data.split(u'\x04PART') for verse in verses: - self.addVerse(verse[1:]) + self.addVerse(self.stripControlChars(verse[1:])) if not self.finish(): self.logError(file) + + def stripControlChars(self, text): + """ + Get rid of ASCII control characters. + + Illegals chars are ASCII code points 0-31 and 127, except: + * ``HT`` (0x09) - Tab + * ``LF`` (0x0a) - Line feed + * ``CR`` (0x0d) - Carriage return + """ + ILLEGAL_CHARS = u'([\x00-\x08\x0b-\x0c\x0e-\x1f\x7f])' + return re.sub(ILLEGAL_CHARS, '', text) \ No newline at end of file From 1184e9219d3d4e4abd9d34e4a18783a90de74db1 Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Mon, 30 Apr 2012 22:19:36 +1000 Subject: [PATCH 4/8] Small fixes for comments typos in songs.lib modules --- openlp/plugins/songs/lib/importer.py | 2 +- openlp/plugins/songs/lib/powersongimport.py | 3 +-- openlp/plugins/songs/lib/songimport.py | 2 +- openlp/plugins/songs/lib/wowimport.py | 6 +++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/openlp/plugins/songs/lib/importer.py b/openlp/plugins/songs/lib/importer.py index 9dde9f0af..16d943a73 100644 --- a/openlp/plugins/songs/lib/importer.py +++ b/openlp/plugins/songs/lib/importer.py @@ -90,7 +90,7 @@ class SongFormat(object): @staticmethod def get_class(format): """ - Return the appropriate imeplementation class. + Return the appropriate implementation class. ``format`` The song format. diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py index 207777570..3c3d9a641 100644 --- a/openlp/plugins/songs/lib/powersongimport.py +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -75,8 +75,7 @@ class PowerSongImport(SongImport): * There is no concept of verse types. Valid extensions for a PowerSong song file are: - - * .song + * .song """ def __init__(self, manager, **kwargs): diff --git a/openlp/plugins/songs/lib/songimport.py b/openlp/plugins/songs/lib/songimport.py index 6fd9dd403..b3ceb49ec 100644 --- a/openlp/plugins/songs/lib/songimport.py +++ b/openlp/plugins/songs/lib/songimport.py @@ -111,7 +111,7 @@ class SongImport(QtCore.QObject): instance a database), then this should be the song's title. ``reason`` - The reason, why the import failed. The string should be as + The reason why the import failed. The string should be as informative as possible. """ self.setDefaults() diff --git a/openlp/plugins/songs/lib/wowimport.py b/openlp/plugins/songs/lib/wowimport.py index 99f448736..97a11d873 100644 --- a/openlp/plugins/songs/lib/wowimport.py +++ b/openlp/plugins/songs/lib/wowimport.py @@ -71,7 +71,7 @@ class WowImport(SongImport): * ``SOH`` (0x01) - Chorus * ``STX`` (0x02) - Bridge - Blocks are seperated by two bytes. The first byte is 0x01, and the + Blocks are separated by two bytes. The first byte is 0x01, and the second byte is 0x80. Lines: @@ -126,7 +126,7 @@ class WowImport(SongImport): ('Invalid Words of Worship song file. Missing ' '"CSongDoc::CBlock" string.')))) continue - # Seek to the beging of the first block + # Seek to the beginning of the first block song_data.seek(82) for block in range(no_of_blocks): self.linesToRead = ord(song_data.read(4)[:1]) @@ -140,7 +140,7 @@ class WowImport(SongImport): block_text += self.lineText self.linesToRead -= 1 block_type = BLOCK_TYPES[ord(song_data.read(4)[:1])] - # Blocks are seperated by 2 bytes, skip them, but not if + # Blocks are separated by 2 bytes, skip them, but not if # this is the last block! if block + 1 < no_of_blocks: song_data.seek(2, os.SEEK_CUR) From 63b71802abb7f4ff4c450c2dd5aa4e62affd6db6 Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Tue, 1 May 2012 23:51:46 +1000 Subject: [PATCH 5/8] Rewrote PowerSongImport class to read variable-length strings directly from file, rather than searching for them. Other minor fixes. --- openlp/plugins/songs/lib/powersongimport.py | 155 ++++++++++---------- openlp/plugins/songs/lib/songimport.py | 2 +- 2 files changed, 77 insertions(+), 80 deletions(-) diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py index 3c3d9a641..e2ba13f68 100644 --- a/openlp/plugins/songs/lib/powersongimport.py +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -29,7 +29,6 @@ The :mod:`powersongimport` module provides the functionality for importing PowerSong songs into the OpenLP database. """ import logging -import re from openlp.core.lib import translate from openlp.plugins.songs.lib.songimport import SongImport @@ -43,34 +42,27 @@ class PowerSongImport(SongImport): **PowerSong Song File Format:** - * Encoded as UTF-8. - * The file has a number of fields, with the song metadata fields first, - followed by the lyrics fields. + The file has a number of label-field pairs of variable length. - Fields: - Each field begins with one of four labels, each of which begin with one - non-printing byte: - - * ``ENQ`` (0x05) ``TITLE`` - * ``ACK`` (0x06) ``AUTHOR`` - * ``CR`` (0x0d) ``COPYRIGHTLINE`` - * ``EOT`` (0x04) ``PART`` - - The field label is separated from the field contents by one random byte. - Each field ends at the next field label, or at the end of the file. + Labels and Fields: + * Every label and field is preceded by an integer which specifies its + byte-length. + * If the length < 128 bytes, only one byte is used to encode + the length integer. + * But if it's greater, as many bytes are used as necessary: + * the first byte = (length % 128) + 128 + * the next byte = length / 128 + * another byte is only used if (length / 128) >= 128 + * and so on (3 bytes needed iff length > 16383) Metadata fields: * Every PowerSong file begins with a TITLE field. * This is followed by zero or more AUTHOR fields. - * The next field is always COPYRIGHTLINE, but it may be empty (in which - case the byte following the label is the null byte 0x00). - When the field contents are not empty, the first byte is 0xc2 and - should be discarded. - This field may contain a CCLI number at the end: e.g. "CCLI 176263" + * The next label is always COPYRIGHTLINE, but its field may be empty. + This field may also contain a CCLI number: e.g. "CCLI 176263". Lyrics fields: - * The COPYRIGHTLINE field is followed by zero or more PART fields, each - of which contains one verse. + * Each verse is contained in a PART field. * Lines have Windows line endings ``CRLF`` (0x0d, 0x0a). * There is no concept of verse types. @@ -78,12 +70,6 @@ class PowerSongImport(SongImport): * .song """ - def __init__(self, manager, **kwargs): - """ - Initialise the PowerSong importer. - """ - SongImport.__init__(self, manager, **kwargs) - def doImport(self): """ Receive a single file or a list of files to import. @@ -94,67 +80,78 @@ class PowerSongImport(SongImport): if self.stopImportFlag: return self.setDefaults() - with open(file, 'rb') as song_file: - # Check file is valid PowerSong song format - if song_file.read(6) != u'\x05TITLE': - self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', - ('Invalid PowerSong song file. Missing ' - '"\x05TITLE" header.')))) - continue - song_data = unicode(song_file.read(), u'utf-8', u'replace') - # Extract title and author fields - first_part, sep, song_data = song_data.partition( - u'\x0DCOPYRIGHTLINE') - if not sep: + with open(file, 'rb') as self.song_file: + # Get title and check file is valid PowerSong song format + label, field = self.readLabelField() + if label != u'TITLE': self.logError(file, unicode( translate('SongsPlugin.PowerSongSongImport', ('Invalid PowerSong song file. Missing ' - '"\x0DCOPYRIGHTLINE" string.')))) + '"TITLE" header.')))) continue - title_authors = first_part.split(u'\x06AUTHOR') - # Get the song title - self.title = self.stripControlChars(title_authors[0][1:]) - # Extract the author(s) - for author in title_authors[1:]: - self.parseAuthor(self.stripControlChars(author[1:])) - # Get copyright and CCLI number - copyright, sep, song_data = song_data.partition( - u'\x04PART') - if not sep: + else: + self.title = field.replace(u'\n', u' ') + while label: + label, field = self.readLabelField() + # Get the author(s) + if label == u'AUTHOR': + self.parseAuthor(field) + # Get copyright and look for CCLI number + elif label == u'COPYRIGHTLINE': + found_copyright = True + copyright, sep, ccli_no = field.rpartition(u'CCLI') + if not sep: + copyright = ccli_no + ccli_no = u'' + if copyright: + self.addCopyright(copyright.rstrip( + u'\n').replace(u'\n', u' ')) + if ccli_no: + ccli_no = ccli_no.strip(u' :') + if ccli_no.isdigit(): + self.ccliNumber = ccli_no + # Get verse(s) + elif label == u'PART': + self.addVerse(field) + # Check for copyright label + if not found_copyright: self.logError(file, unicode( translate('SongsPlugin.PowerSongSongImport', - ('No verses found. Missing ' - '"\x04PART" string.')))) + ('"%s" Invalid PowerSong song file. Missing ' + '"COPYRIGHTLINE" string.' % self.title)))) + continue + # Check for at least one verse + if not self.verses: + self.logError(file, unicode( + translate('SongsPlugin.PowerSongSongImport', + ('"%s" No verses found. Missing "PART" string.' + % self.title)))) continue - copyright, sep, ccli_no = copyright[1:].rpartition(u'CCLI ') - if not sep: - copyright = ccli_no - ccli_no = u'' - if copyright: - if copyright[0] == u'\u00c2': - copyright = copyright[1:] - self.addCopyright(self.stripControlChars( - copyright.rstrip(u'\n'))) - if ccli_no: - ccli_no = ccli_no.strip() - if ccli_no.isdigit(): - self.ccliNumber = self.stripControlChars(ccli_no) - # Get the verse(s) - verses = song_data.split(u'\x04PART') - for verse in verses: - self.addVerse(self.stripControlChars(verse[1:])) if not self.finish(): self.logError(file) - def stripControlChars(self, text): + def readLabelField(self): """ - Get rid of ASCII control characters. + Return as a 2-tuple the next two variable-length strings from song file + """ + label = unicode(self.song_file.read( + self.readLength()), u'utf-8', u'ignore') + if label: + field = unicode(self.song_file.read( + self.readLength()), u'utf-8', u'ignore') + else: + field = u'' + return label, field - Illegals chars are ASCII code points 0-31 and 127, except: - * ``HT`` (0x09) - Tab - * ``LF`` (0x0a) - Line feed - * ``CR`` (0x0d) - Carriage return + def readLength(self): """ - ILLEGAL_CHARS = u'([\x00-\x08\x0b-\x0c\x0e-\x1f\x7f])' - return re.sub(ILLEGAL_CHARS, '', text) \ No newline at end of file + Return the byte-length of the next variable-length string in song file + """ + this_byte_char = self.song_file.read(1) + if not this_byte_char: + return 0 + this_byte = ord(this_byte_char) + if this_byte < 128: + return this_byte + else: + return (self.readLength() * 128) + (this_byte - 128) diff --git a/openlp/plugins/songs/lib/songimport.py b/openlp/plugins/songs/lib/songimport.py index b3ceb49ec..79e960919 100644 --- a/openlp/plugins/songs/lib/songimport.py +++ b/openlp/plugins/songs/lib/songimport.py @@ -107,7 +107,7 @@ class SongImport(QtCore.QObject): ``filepath`` This should be the file path if ``self.importSource`` is a list - with different files. If it is not a list, but a single file (for + with different files. If it is not a list, but a single file (for instance a database), then this should be the song's title. ``reason`` From 8877484aea388bd84ac786ab9f9ed1c44271c660 Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Wed, 2 May 2012 19:14:30 +1000 Subject: [PATCH 6/8] Tidy up code, stonger error checking. --- openlp/plugins/songs/lib/powersongimport.py | 139 +++++++++++--------- 1 file changed, 78 insertions(+), 61 deletions(-) diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py index e2ba13f68..1b99d756f 100644 --- a/openlp/plugins/songs/lib/powersongimport.py +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -72,67 +72,67 @@ class PowerSongImport(SongImport): def doImport(self): """ - Receive a single file or a list of files to import. + Receive a list of files to import. """ - if isinstance(self.importSource, list): - self.importWizard.progressBar.setMaximum(len(self.importSource)) - for file in self.importSource: - if self.stopImportFlag: - return - self.setDefaults() - with open(file, 'rb') as self.song_file: - # Get title and check file is valid PowerSong song format + if not isinstance(self.importSource, list): + return + self.importWizard.progressBar.setMaximum(len(self.importSource)) + for file in self.importSource: + if self.stopImportFlag: + return + self.setDefaults() + parse_error = False + with open(file, 'rb') as self.song_file: + # Get title to check file is valid PowerSong song format + label, field = self.readLabelField() + if label == u'TITLE': + self.title = field.replace(u'\n', u' ') + else: + self.logError(file, unicode( + translate('SongsPlugin.PowerSongSongImport', \ + 'Invalid PowerSong file. Missing "TITLE" header.'))) + continue + # Get rest of fields from file + while True: label, field = self.readLabelField() - if label != u'TITLE': - self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', - ('Invalid PowerSong song file. Missing ' - '"TITLE" header.')))) - continue + if not label: + break + if label == u'AUTHOR': + self.parseAuthor(field) + elif label == u'COPYRIGHTLINE': + found_copyright = True + self.parseCopyrightCCLI(field) + elif label == u'PART': + self.addVerse(field) else: - self.title = field.replace(u'\n', u' ') - while label: - label, field = self.readLabelField() - # Get the author(s) - if label == u'AUTHOR': - self.parseAuthor(field) - # Get copyright and look for CCLI number - elif label == u'COPYRIGHTLINE': - found_copyright = True - copyright, sep, ccli_no = field.rpartition(u'CCLI') - if not sep: - copyright = ccli_no - ccli_no = u'' - if copyright: - self.addCopyright(copyright.rstrip( - u'\n').replace(u'\n', u' ')) - if ccli_no: - ccli_no = ccli_no.strip(u' :') - if ccli_no.isdigit(): - self.ccliNumber = ccli_no - # Get verse(s) - elif label == u'PART': - self.addVerse(field) - # Check for copyright label - if not found_copyright: + parse_error = True self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', - ('"%s" Invalid PowerSong song file. Missing ' - '"COPYRIGHTLINE" string.' % self.title)))) - continue - # Check for at least one verse - if not self.verses: - self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', - ('"%s" No verses found. Missing "PART" string.' - % self.title)))) - continue - if not self.finish(): - self.logError(file) + translate('SongsPlugin.PowerSongSongImport', \ + '"%s" Invalid PowerSong file. Unknown header: "%s".' + % (self.title, label)))) + break + if parse_error: + continue + # Check that file had COPYRIGHTLINE label + if not found_copyright: + self.logError(file, unicode( + translate('SongsPlugin.PowerSongSongImport', \ + '"%s" Invalid PowerSong file. Missing "COPYRIGHTLINE" \ + header.' % self.title))) + continue + # Check that file had at least one verse + if not self.verses: + self.logError(file, unicode( + translate('SongsPlugin.PowerSongSongImport', \ + '"%s" Verses not found. Missing "PART" header.' + % self.title))) + continue + if not self.finish(): + self.logError(file) def readLabelField(self): """ - Return as a 2-tuple the next two variable-length strings from song file + Read (as a 2-tuple) the next two variable-length strings """ label = unicode(self.song_file.read( self.readLength()), u'utf-8', u'ignore') @@ -145,13 +145,30 @@ class PowerSongImport(SongImport): def readLength(self): """ - Return the byte-length of the next variable-length string in song file + Read the byte-length of the next variable-length string + + If at the end of the file, returns 0. """ - this_byte_char = self.song_file.read(1) - if not this_byte_char: + this_byte = self.song_file.read(1) + if not this_byte: return 0 - this_byte = ord(this_byte_char) - if this_byte < 128: - return this_byte + this_byte_val = ord(this_byte) + if this_byte_val < 128: + return this_byte_val else: - return (self.readLength() * 128) + (this_byte - 128) + return (self.readLength() * 128) + (this_byte_val - 128) + + def parseCopyrightCCLI(self, field): + """ + Look for CCLI song number, and get copyright + """ + copyright, sep, ccli_no = field.rpartition(u'CCLI') + if not sep: + copyright = ccli_no + ccli_no = u'' + if copyright: + self.addCopyright(copyright.rstrip(u'\n').replace(u'\n', u' ')) + if ccli_no: + ccli_no = ccli_no.strip(u' :') + if ccli_no.isdigit(): + self.ccliNumber = ccli_no From 45c180308a5103ed03ed344060ae34522159bb33 Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Thu, 3 May 2012 22:41:49 +1000 Subject: [PATCH 7/8] Implemented BinaryReader.Read7BitEncodedInt from .NET. Tidy code. --- openlp/plugins/songs/lib/powersongimport.py | 171 +++++++++++--------- 1 file changed, 96 insertions(+), 75 deletions(-) diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py index 1b99d756f..9d5fa8f8e 100644 --- a/openlp/plugins/songs/lib/powersongimport.py +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -42,31 +42,30 @@ class PowerSongImport(SongImport): **PowerSong Song File Format:** - The file has a number of label-field pairs of variable length. + The file has a number of label-field pairs. - Labels and Fields: - * Every label and field is preceded by an integer which specifies its - byte-length. - * If the length < 128 bytes, only one byte is used to encode - the length integer. - * But if it's greater, as many bytes are used as necessary: - * the first byte = (length % 128) + 128 - * the next byte = length / 128 - * another byte is only used if (length / 128) >= 128 - * and so on (3 bytes needed iff length > 16383) + Label and Field strings: + + * Every label and field is a variable length string preceded by an + integer specifying it's byte length. + * Integer is 32-bit but is encoded in 7-bit format to save space. Thus + if length will fit in 7 bits (ie <= 127) it takes up only one byte. Metadata fields: - * Every PowerSong file begins with a TITLE field. - * This is followed by zero or more AUTHOR fields. - * The next label is always COPYRIGHTLINE, but its field may be empty. + + * Every PowerSong file has a TITLE field. + * There is zero or more AUTHOR fields. + * There is always a COPYRIGHTLINE label, but its field may be empty. This field may also contain a CCLI number: e.g. "CCLI 176263". Lyrics fields: + * Each verse is contained in a PART field. * Lines have Windows line endings ``CRLF`` (0x0d, 0x0a). * There is no concept of verse types. Valid extensions for a PowerSong song file are: + * .song """ @@ -75,6 +74,8 @@ class PowerSongImport(SongImport): Receive a list of files to import. """ if not isinstance(self.importSource, list): + self.logError(unicode(translate('SongsPlugin.PowerSongImport', + 'No files to import.'))) return self.importWizard.progressBar.setMaximum(len(self.importSource)) for file in self.importSource: @@ -82,83 +83,103 @@ class PowerSongImport(SongImport): return self.setDefaults() parse_error = False - with open(file, 'rb') as self.song_file: - # Get title to check file is valid PowerSong song format - label, field = self.readLabelField() - if label == u'TITLE': - self.title = field.replace(u'\n', u' ') - else: - self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', \ - 'Invalid PowerSong file. Missing "TITLE" header.'))) - continue - # Get rest of fields from file + with open(file, 'rb') as song_data: while True: - label, field = self.readLabelField() - if not label: - break - if label == u'AUTHOR': - self.parseAuthor(field) - elif label == u'COPYRIGHTLINE': - found_copyright = True - self.parseCopyrightCCLI(field) - elif label == u'PART': - self.addVerse(field) - else: + try: + label = self._readString(song_data) + if not label: + break + field = self._readString(song_data) + except ValueError: parse_error = True self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', \ - '"%s" Invalid PowerSong file. Unknown header: "%s".' - % (self.title, label)))) + translate('SongsPlugin.PowerSongImport', + 'Invalid PowerSong file. Unexpected byte value.'))) break - if parse_error: - continue - # Check that file had COPYRIGHTLINE label - if not found_copyright: - self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', \ - '"%s" Invalid PowerSong file. Missing "COPYRIGHTLINE" \ - header.' % self.title))) - continue - # Check that file had at least one verse - if not self.verses: - self.logError(file, unicode( - translate('SongsPlugin.PowerSongSongImport', \ - '"%s" Verses not found. Missing "PART" header.' - % self.title))) - continue + else: + if label == u'TITLE': + self.title = field.replace(u'\n', u' ') + elif label == u'AUTHOR': + self.parseAuthor(field) + elif label == u'COPYRIGHTLINE': + found_copyright = True + self._parseCopyrightCCLI(field) + elif label == u'PART': + self.addVerse(field) + if parse_error: + continue + # Check that file had TITLE field + if not self.title: + self.logError(file, unicode( + translate('SongsPlugin.PowerSongImport', + 'Invalid PowerSong file. Missing "TITLE" header.'))) + continue + # Check that file had COPYRIGHTLINE label + if not found_copyright: + self.logError(file, unicode( + translate('SongsPlugin.PowerSongImport', + '"%s" Invalid PowerSong file. Missing "COPYRIGHTLINE" ' + 'header.' % self.title))) + continue + # Check that file had at least one verse + if not self.verses: + self.logError(file, unicode( + translate('SongsPlugin.PowerSongImport', + '"%s" Verses not found. Missing "PART" header.' + % self.title))) + continue if not self.finish(): self.logError(file) - def readLabelField(self): + def _readString(self, file_object): """ - Read (as a 2-tuple) the next two variable-length strings + Reads in next variable-length string. """ - label = unicode(self.song_file.read( - self.readLength()), u'utf-8', u'ignore') - if label: - field = unicode(self.song_file.read( - self.readLength()), u'utf-8', u'ignore') - else: - field = u'' - return label, field + string_len = self._read7BitEncodedInteger(file_object) + return unicode(file_object.read(string_len), u'utf-8', u'ignore') - def readLength(self): + def _read7BitEncodedInteger(self, file_object): """ - Read the byte-length of the next variable-length string + Reads in a 32-bit integer in compressed 7-bit format. - If at the end of the file, returns 0. + Accomplished by reading the integer 7 bits at a time. The high bit + of the byte when set means to continue reading more bytes. + If the integer will fit in 7 bits (ie <= 127), it only takes up one + byte. Otherwise, it may take up to 5 bytes. + + Reference: .NET method System.IO.BinaryReader.Read7BitEncodedInt """ - this_byte = self.song_file.read(1) - if not this_byte: + val = 0 + shift = 0 + i = 0 + while True: + # Check for corrupted stream (since max 5 bytes per 32-bit integer) + if i == 5: + raise ValueError + byte = self._readByte(file_object) + # Strip high bit and shift left + val += (byte & 0x7f) << shift + shift += 7 + high_bit_set = byte & 0x80 + if not high_bit_set: + break + i += 1 + return val + + def _readByte(self, file_object): + """ + Reads in next byte as an unsigned integer + + Note: returns 0 at end of file. + """ + byte_str = file_object.read(1) + # If read result is empty, then reached end of file + if not byte_str: return 0 - this_byte_val = ord(this_byte) - if this_byte_val < 128: - return this_byte_val else: - return (self.readLength() * 128) + (this_byte_val - 128) + return ord(byte_str) - def parseCopyrightCCLI(self, field): + def _parseCopyrightCCLI(self, field): """ Look for CCLI song number, and get copyright """ From 416cbe465ea9465b953093065a50fa69ea75f450 Mon Sep 17 00:00:00 2001 From: Samuel Findlay Date: Thu, 3 May 2012 22:50:10 +1000 Subject: [PATCH 8/8] Changed 'PowerSong' to 'PowerSong 1.0' --- openlp/core/ui/wizard.py | 2 +- openlp/plugins/songs/forms/songimportform.py | 2 +- openlp/plugins/songs/lib/powersongimport.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/openlp/core/ui/wizard.py b/openlp/core/ui/wizard.py index 74dcfceaf..500d958fd 100644 --- a/openlp/core/ui/wizard.py +++ b/openlp/core/ui/wizard.py @@ -53,7 +53,7 @@ class WizardStrings(object): OL = u'OpenLyrics' OS = u'OpenSong' OSIS = u'OSIS' - PS = u'PowerSong' + PS = u'PowerSong 1.0' SB = u'SongBeamer' SoF = u'Songs of Fellowship' SSP = u'SongShow Plus' diff --git a/openlp/plugins/songs/forms/songimportform.py b/openlp/plugins/songs/forms/songimportform.py index 0cacae612..d5f7715ea 100644 --- a/openlp/plugins/songs/forms/songimportform.py +++ b/openlp/plugins/songs/forms/songimportform.py @@ -627,7 +627,7 @@ class SongImportForm(OpenLPWizard): self.getFiles(WizardStrings.OpenTypeFile % WizardStrings.PS, self.powerSongFileListWidget, u'%s (*.song)' % translate('SongsPlugin.ImportWizardForm', - 'PowerSong Song Files') + 'PowerSong 1.0 Song Files') ) def onPowerSongRemoveButtonClicked(self): diff --git a/openlp/plugins/songs/lib/powersongimport.py b/openlp/plugins/songs/lib/powersongimport.py index 9d5fa8f8e..31491398c 100644 --- a/openlp/plugins/songs/lib/powersongimport.py +++ b/openlp/plugins/songs/lib/powersongimport.py @@ -40,9 +40,9 @@ class PowerSongImport(SongImport): The :class:`PowerSongImport` class provides the ability to import song files from PowerSong. - **PowerSong Song File Format:** + **PowerSong 1.0 Song File Format:** - The file has a number of label-field pairs. + The file has a number of label-field (think key-value) pairs. Label and Field strings: