diff --git a/openlp/plugins/songs/lib/__init__.py b/openlp/plugins/songs/lib/__init__.py index ce41b6faa..a51f3f2fc 100644 --- a/openlp/plugins/songs/lib/__init__.py +++ b/openlp/plugins/songs/lib/__init__.py @@ -36,6 +36,104 @@ from ui import SongStrings WHITESPACE = re.compile(r'[\W_]+', re.UNICODE) APOSTROPHE = re.compile(u'[\'`’ʻ′]', re.UNICODE) +PATTERN = re.compile(r"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'" + r"([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", re.I) +# RTF control words which specify a "destination" to be ignored. +DESTINATIONS = frozenset(( + u'aftncn', u'aftnsep', u'aftnsepc', u'annotation', u'atnauthor', + u'atndate', u'atnicn', u'atnid', u'atnparent', u'atnref', u'atntime', + u'atrfend', u'atrfstart', u'author', u'background', u'bkmkend', + u'bkmkstart', u'blipuid', u'buptim', u'category', + u'colorschememapping', u'colortbl', u'comment', u'company', u'creatim', + u'datafield', u'datastore', u'defchp', u'defpap', u'do', u'doccomm', + u'docvar', u'dptxbxtext', u'ebcend', u'ebcstart', u'factoidname', + u'falt', u'fchars', u'ffdeftext', u'ffentrymcr', u'ffexitmcr', + u'ffformat', u'ffhelptext', u'ffl', u'ffname', u'ffstattext', u'field', + u'file', u'filetbl', u'fldinst', u'fldrslt', u'fldtype', u'fname', + u'fontemb', u'fontfile', u'footer', u'footerf', u'footerl', u'footerr', + u'footnote', u'formfield', u'ftncn', u'ftnsep', u'ftnsepc', u'g', + u'generator', u'gridtbl', u'header', u'headerf', u'headerl', + u'headerr', u'hl', u'hlfr', u'hlinkbase', u'hlloc', u'hlsrc', u'hsv', + u'htmltag', u'info', u'keycode', u'keywords', u'latentstyles', + u'lchars', u'levelnumbers', u'leveltext', u'lfolevel', u'linkval', + u'list', u'listlevel', u'listname', u'listoverride', + u'listoverridetable', u'listpicture', u'liststylename', u'listtable', + u'listtext', u'lsdlockedexcept', u'macc', u'maccPr', u'mailmerge', + u'maln', u'malnScr', u'manager', u'margPr', u'mbar', u'mbarPr', + u'mbaseJc', u'mbegChr', u'mborderBox', u'mborderBoxPr', u'mbox', + u'mboxPr', u'mchr', u'mcount', u'mctrlPr', u'md', u'mdeg', u'mdegHide', + u'mden', u'mdiff', u'mdPr', u'me', u'mendChr', u'meqArr', u'meqArrPr', + u'mf', u'mfName', u'mfPr', u'mfunc', u'mfuncPr', u'mgroupChr', + u'mgroupChrPr', u'mgrow', u'mhideBot', u'mhideLeft', u'mhideRight', + u'mhideTop', u'mhtmltag', u'mlim', u'mlimloc', u'mlimlow', + u'mlimlowPr', u'mlimupp', u'mlimuppPr', u'mm', u'mmaddfieldname', + u'mmath', u'mmathPict', u'mmathPr', u'mmaxdist', u'mmc', u'mmcJc', + u'mmconnectstr', u'mmconnectstrdata', u'mmcPr', u'mmcs', + u'mmdatasource', u'mmheadersource', u'mmmailsubject', u'mmodso', + u'mmodsofilter', u'mmodsofldmpdata', u'mmodsomappedname', + u'mmodsoname', u'mmodsorecipdata', u'mmodsosort', u'mmodsosrc', + u'mmodsotable', u'mmodsoudl', u'mmodsoudldata', u'mmodsouniquetag', + u'mmPr', u'mmquery', u'mmr', u'mnary', u'mnaryPr', u'mnoBreak', + u'mnum', u'mobjDist', u'moMath', u'moMathPara', u'moMathParaPr', + u'mopEmu', u'mphant', u'mphantPr', u'mplcHide', u'mpos', u'mr', + u'mrad', u'mradPr', u'mrPr', u'msepChr', u'mshow', u'mshp', u'msPre', + u'msPrePr', u'msSub', u'msSubPr', u'msSubSup', u'msSubSupPr', u'msSup', + u'msSupPr', u'mstrikeBLTR', u'mstrikeH', u'mstrikeTLBR', u'mstrikeV', + u'msub', u'msubHide', u'msup', u'msupHide', u'mtransp', u'mtype', + u'mvertJc', u'mvfmf', u'mvfml', u'mvtof', u'mvtol', u'mzeroAsc', + u'mzeroDesc', u'mzeroWid', u'nesttableprops', u'nextfile', + u'nonesttables', u'objalias', u'objclass', u'objdata', u'object', + u'objname', u'objsect', u'objtime', u'oldcprops', u'oldpprops', + u'oldsprops', u'oldtprops', u'oleclsid', u'operator', u'panose', + u'password', u'passwordhash', u'pgp', u'pgptbl', u'picprop', u'pict', + u'pn', u'pnseclvl', u'pntext', u'pntxta', u'pntxtb', u'printim', + u'private', u'propname', u'protend', u'protstart', u'protusertbl', + u'pxe', u'result', u'revtbl', u'revtim', u'rsidtbl', u'rxe', u'shp', + u'shpgrp', u'shpinst', u'shppict', u'shprslt', u'shptxt', u'sn', u'sp', + u'staticval', u'stylesheet', u'subject', u'sv', u'svb', u'tc', + u'template', u'themedata', u'title', u'txe', u'ud', u'upr', + u'userprops', u'wgrffmtfilter', u'windowcaption', u'writereservation', + u'writereservhash', u'xe', u'xform', u'xmlattrname', u'xmlattrvalue', + u'xmlclose', u'xmlname', u'xmlnstbl', u'xmlopen')) +# Translation of some special characters. +SPECIAL_CHARS = { + u'par': u'\n', + u'sect': u'\n\n', + # Required page and column break. + # Would be good if we could split verse into subverses here. + u'page': u'\n\n', + u'column': u'\n\n', + # Soft breaks. + u'softpage': u'[---]', + u'softcol': u'[---]', + u'line': u'\n', + u'tab': u'\t', + u'emdash': u'\u2014', + u'endash': u'\u2013', + u'emspace': u'\u2003', + u'enspace': u'\u2002', + u'qmspace': u'\u2005', + u'bullet': u'\u2022', + u'lquote': u'\u2018', + u'rquote': u'\u2019', + u'ldblquote': u'\u201C', + u'rdblquote': u'\u201D', + u'ltrmark': u'\u200E', + u'rtlmark': u'\u200F', + u'zwj': u'\u200D', + u'zwnj': u'\u200C'} +CHARSET_MAPPING = { + u'fcharset0': u'cp1252', + u'fcharset161': u'cp1253', + u'fcharset162': u'cp1254', + u'fcharset163': u'cp1258', + u'fcharset177': u'cp1255', + u'fcharset178': u'cp1256', + u'fcharset186': u'cp1257', + u'fcharset204': u'cp1251', + u'fcharset222': u'cp874', + u'fcharset238': u'cp1250'} + class VerseType(object): """ @@ -366,6 +464,136 @@ def clean_song(manager, song): if song.copyright: song.copyright = CONTROL_CHARS.sub(u'', song.copyright).strip() + +def get_encoding(font, font_table, default_encoding, failed=False): + """ + Finds an encoding to use. Asks user, if necessary. + + ``font`` + The number of currently active font. + + ``font_table`` + Dictionary of fonts and respective encodings. + + ``default_encoding`` + The defaul encoding to use when font_table is empty or no font is used. + + ``failed`` + A boolean indicating whether the previous encoding didn't work. + """ + encoding = None + if font in font_table: + encoding = font_table[font] + if not encoding and default_encoding: + encoding = default_encoding + if not encoding or failed: + encoding = retrieve_windows_encoding() + default_encoding = encoding + font_table[font] = encoding + return encoding, default_encoding + + +def strip_rtf(text, default_encoding=None): + """ + This function strips RTF control structures and returns an unicode string. + + Thanks to Markus Jarderot (MizardX) for this code, used by permission. + http://stackoverflow.com/questions/188545 + + ``text`` + RTF-encoded text, a string. + + ``default_encoding`` + Default encoding to use when no encoding is specified. + """ + # Current font is the font tag we last met. + font = u'' + # Character encoding is defined inside fonttable. + # font_table could contain eg u'0': u'cp1252' + font_table = {u'': u''} + # Stack of things to keep track of when entering/leaving groups. + stack = [] + # Whether this group (and all inside it) are "ignorable". + ignorable = False + # Number of ASCII characters to skip after an unicode character. + ucskip = 1 + # Number of ASCII characters left to skip. + curskip = 0 + # Output buffer. + out = [] + for match in PATTERN.finditer(text): + word, arg, hex, char, brace, tchar = match.groups() + if brace: + curskip = 0 + if brace == u'{': + # Push state + stack.append((ucskip, ignorable, font)) + elif brace == u'}': + # Pop state + ucskip, ignorable, font = stack.pop() + # \x (not a letter) + elif char: + curskip = 0 + if char == u'~' and not ignorable: + out.append(u'\xA0') + elif char in u'{}\\' and not ignorable: + out.append(char) + elif char == u'-' and not ignorable: + out.append(u'\u00AD') + elif char == u'_' and not ignorable: + out.append(u'\u2011') + elif char == u'*': + ignorable = True + # \command + elif word: + curskip = 0 + if word in DESTINATIONS: + ignorable = True + elif word in SPECIAL_CHARS: + out.append(SPECIAL_CHARS[word]) + elif word == u'uc': + ucskip = int(arg) + elif word == u' ': + c = int(arg) + if c < 0: + c += 0x10000 + out.append(unichr(c)) + curskip = ucskip + elif word == u'fonttbl': + ignorable = True + elif word == u'f': + font = arg + elif word == u'ansicpg': + font_table[font] = 'cp' + arg + elif word == u'fcharset' and font not in font_table and \ + word + arg in CHARSET_MAPPING: + # \ansicpg overrides \fcharset, if present. + font_table[font] = CHARSET_MAPPING[word + arg] + # \'xx + elif hex: + if curskip > 0: + curskip -= 1 + elif not ignorable: + charcode = int(hex, 16) + failed = False + while True: + try: + encoding, default_encoding = get_encoding(font, + font_table, default_encoding, failed=failed) + out.append(chr(charcode).decode(encoding)) + except UnicodeDecodeError: + failed = True + else: + break + elif tchar: + if curskip > 0: + curskip -= 1 + elif not ignorable: + out.append(tchar) + text = u''.join(out) + return text, default_encoding + + from xml import OpenLyrics, SongXML from songstab import SongsTab from mediaitem import SongMediaItem diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index 227b8e4b6..97567f270 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -36,7 +36,7 @@ import re from openlp.core.lib import translate from openlp.plugins.songs.lib import VerseType -from openlp.plugins.songs.lib import retrieve_windows_encoding +from openlp.plugins.songs.lib import retrieve_windows_encoding, strip_rtf from songimport import SongImport RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}') @@ -45,101 +45,6 @@ SLIDE_BREAK_REGEX = re.compile(r'\n *?\n[\n ]*') NUMBER_REGEX = re.compile(r'[0-9]+') NOTE_REGEX = re.compile(r'\(.*?\)') -def strip_rtf(blob, encoding): - depth = 0 - control = False - clear_text = [] - control_word = [] - - # workaround for \tx bug: remove one pair of curly braces - # if \tx is encountered - match = RTF_STRIPPING_REGEX.search(blob) - if match: - # start and end indices of match are curly braces - filter them out - blob = ''.join([blob[i] for i in xrange(len(blob)) - if i != match.start() and i !=match.end()]) - - for c in blob: - if control: - # for delimiters, set control to False - if c == '{': - if control_word: - depth += 1 - control = False - elif c == '}': - if control_word: - depth -= 1 - control = False - elif c == '\\': - new_control = bool(control_word) - control = False - elif c.isspace(): - control = False - else: - control_word.append(c) - if len(control_word) == 3 and control_word[0] == '\'': - control = False - if not control: - if not control_word: - if c == '{' or c == '}' or c == '\\': - clear_text.append(c) - else: - control_str = ''.join(control_word) - if control_str == 'par' or control_str == 'line': - clear_text.append(u'\n') - elif control_str == 'tab': - clear_text.append(u'\t') - # Prefer the encoding specified by the RTF data to that - # specified by the Paradox table header - # West European encoding - elif control_str == 'fcharset0': - encoding = u'cp1252' - # Greek encoding - elif control_str == 'fcharset161': - encoding = u'cp1253' - # Turkish encoding - elif control_str == 'fcharset162': - encoding = u'cp1254' - # Vietnamese encoding - elif control_str == 'fcharset163': - encoding = u'cp1258' - # Hebrew encoding - elif control_str == 'fcharset177': - encoding = u'cp1255' - # Arabic encoding - elif control_str == 'fcharset178': - encoding = u'cp1256' - # Baltic encoding - elif control_str == 'fcharset186': - encoding = u'cp1257' - # Cyrillic encoding - elif control_str == 'fcharset204': - encoding = u'cp1251' - # Thai encoding - elif control_str == 'fcharset222': - encoding = u'cp874' - # Central+East European encoding - elif control_str == 'fcharset238': - encoding = u'cp1250' - elif control_str[0] == '\'': - s = chr(int(control_str[1:3], 16)) - clear_text.append(s.decode(encoding)) - del control_word[:] - if c == '\\' and new_control: - control = True - elif c == '{': - depth += 1 - elif c == '}': - depth -= 1 - elif depth > 2: - continue - elif c == '\n' or c == '\r': - continue - elif c == '\\': - control = True - else: - clear_text.append(c) - return u''.join(clear_text) class FieldDescEntry: def __init__(self, name, type, size): @@ -274,7 +179,7 @@ class EasyWorshipSongImport(SongImport): self.addAuthor(author_name.strip()) if words: # Format the lyrics - words = strip_rtf(words, self.encoding) + words, self.encoding = strip_rtf(words, self.encoding) verse_type = VerseType.Tags[VerseType.Verse] for verse in SLIDE_BREAK_REGEX.split(words): verse = verse.strip() diff --git a/openlp/plugins/songs/lib/importer.py b/openlp/plugins/songs/lib/importer.py index 6f54114b2..2b04d6859 100644 --- a/openlp/plugins/songs/lib/importer.py +++ b/openlp/plugins/songs/lib/importer.py @@ -44,6 +44,7 @@ from powersongimport import PowerSongImport from ewimport import EasyWorshipSongImport from songbeamerimport import SongBeamerImport from songshowplusimport import SongShowPlusImport +from sundayplusimport import SundayPlusImport from foilpresenterimport import FoilPresenterImport from zionworximport import ZionWorxImport # Imports that might fail @@ -145,9 +146,10 @@ class SongFormat(object): SongBeamer = 11 SongShowPlus = 12 SongsOfFellowship = 13 - WordsOfWorship = 14 - ZionWorx = 15 - #CSV = 16 + SundayPlus = 14 + WordsOfWorship = 15 + ZionWorx = 16 + #CSV = 17 # Set optional attribute defaults __defaults__ = { @@ -275,6 +277,13 @@ class SongFormat(object): 'The Songs of Fellowship importer has been disabled because ' 'OpenLP cannot access OpenOffice or LibreOffice.') }, + SundayPlus: { + u'class': SundayPlusImport, + u'name': u'SundayPlus', + u'prefix': u'sundayPlus', + u'filter': u'%s (*.ptf)' % translate( + 'SongsPlugin.ImportWizardForm', 'SundayPlus Song Files') + }, WordsOfWorship: { u'class': WowImport, u'name': u'Words of Worship', @@ -322,6 +331,7 @@ class SongFormat(object): SongFormat.SongBeamer, SongFormat.SongShowPlus, SongFormat.SongsOfFellowship, + SongFormat.SundayPlus, SongFormat.WordsOfWorship, SongFormat.ZionWorx ] diff --git a/openlp/plugins/songs/lib/sundayplusimport.py b/openlp/plugins/songs/lib/sundayplusimport.py new file mode 100644 index 000000000..fcf324d41 --- /dev/null +++ b/openlp/plugins/songs/lib/sundayplusimport.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2012 Raoul Snyman # +# Portions copyright (c) 2008-2012 Tim Bentley, Gerald Britton, Jonathan # +# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, # +# Meinert Jordan, Armin Köhler, Edwin Lunando, Joshua Miller, Stevan Pettit, # +# Andreas Preikschat, Mattias Põldaru, Christian Richter, Philip Ridout, # +# Simon Scudder, Jeffrey Smith, Maikel Stuivenberg, Martin Thompson, Jon # +# Tibble, Dave Warnock, Frode Woldsund # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### + +import os +import re + +from openlp.plugins.songs.lib import VerseType, retrieve_windows_encoding +from openlp.plugins.songs.lib import strip_rtf +from openlp.plugins.songs.lib.songimport import SongImport + +HOTKEY_TO_VERSE_TYPE = { + u'1': u'v1', + u'2': u'v2', + u'3': u'v3', + u'4': u'v4', + u'5': u'v5', + u'6': u'v6', + u'7': u'v7', + u'8': u'v8', + u'9': u'v9', + u'C': u'c', + u'+': u'b', + u'Z': u'o'} + +class SundayPlusImport(SongImport): + """ + Import Sunday Plus songs + + The format examples can be found attached to bug report at + + """ + + def __init__(self, manager, **kwargs): + """ + Initialise the class. + """ + SongImport.__init__(self, manager, **kwargs) + self.encoding = u'us-ascii' + + def doImport(self): + self.importWizard.progressBar.setMaximum(len(self.importSource)) + for filename in self.importSource: + if self.stopImportFlag: + return + song_file = open(filename) + self.doImportFile(song_file) + song_file.close() + + def doImportFile(self, file): + """ + Process the Sunday Plus file object. + """ + self.setDefaults() + if not self.parse(file.read()): + self.logError(file.name) + return + if not self.title: + self.title = self.titleFromFilename(file.name) + if not self.finish(): + self.logError(file.name) + + def parse(self, data, cell=False): + if len(data) == 0 or data[0:1] != '[' or data[-1] != ']': + self.logError(u'File is malformed') + return False + i = 1 + verse_type = VerseType.Tags[VerseType.Verse] + while i < len(data): + # Data is held as #name: value pairs inside groups marked as []. + # Now we are looking for the name. + if data[i:i + 1] == '#': + name_end = data.find(':', i + 1) + name = data[i + 1:name_end] + i = name_end + 1 + while data[i:i + 1] == ' ': + i += 1 + if data[i:i + 1] == '"': + end = data.find('"', i + 1) + value = data[i + 1:end] + elif data[i:i + 1] == '[': + j = i + inside_quotes = False + while j < len(data): + char = data[j:j + 1] + if char == '"': + inside_quotes = not inside_quotes + elif not inside_quotes and char == ']': + end = j + 1 + break + j += 1 + value = data[i:end] + else: + end = data.find(',', i + 1) + if data.find('(', i, end) != -1: + end = data.find(')', i) + 1 + value = data[i:end] + # If we are in the main group. + if cell == False: + if name == 'title': + self.title = self.decode(self.unescape(value)) + elif name == 'Author': + author = self.decode(self.unescape(value)) + if len(author): + self.addAuthor(author) + elif name == 'Copyright': + self.copyright = self.decode(self.unescape(value)) + elif name[0:4] == 'CELL': + self.parse(value, cell = name[4:]) + # We are in a verse group. + else: + if name == 'MARKER_NAME': + value = value.strip() + if len(value): + verse_type = VerseType.Tags[ + VerseType.from_loose_input(value[0])] + if len(value) >= 2 and value[-1] in ['0', '1', '2', + '3', '4', '5', '6', '7', '8', '9']: + verse_type = "%s%s" % (verse_type, value[-1]) + elif name == 'Hotkey': + # Hotkey always appears after MARKER_NAME, so it + # effectively overrides MARKER_NAME, if present. + if len(value) and \ + value in HOTKEY_TO_VERSE_TYPE.keys(): + verse_type = HOTKEY_TO_VERSE_TYPE[value] + if name == 'rtf': + value = self.unescape(value) + verse, self.encoding = strip_rtf(value, self.encoding) + lines = verse.strip().split('\n') + # If any line inside any verse contains CCLI or + # only Public Domain, we treat this as special data: + # we remove that line and add data to specific field. + for i in xrange(len(lines)): + lines[i] = lines[i].strip() + line = lines[i] + if line[:4].lower() == u'ccli': + m = re.search(r'[0-9]+', line) + if m: + self.ccliNumber = int(m.group(0)) + lines.pop(i) + elif line.lower() == u'public domain': + self.copyright = u'Public Domain' + lines.pop(i) + self.addVerse('\n'.join(lines).strip(), verse_type) + if end == -1: + break + i = end + 1 + i += 1 + return True + + def titleFromFilename(self, filename): + title = os.path.split(filename)[1] + if title.endswith(u'.ptf'): + title = title[:-4] + # For some strange reason all example files names ended with 1-7. + if title.endswith(u'1-7'): + title = title[:-3] + return title.replace(u'_', u' ') + + def decode(self, blob): + while True: + try: + return unicode(blob, self.encoding) + except: + self.encoding = retrieve_windows_encoding() + + def unescape(self, text): + text = text.replace('^^', '"') + text = text.replace('^', '\'') + return text.strip() +