From 2939151ff1e78841b361fe1090617ea07b13f6d5 Mon Sep 17 00:00:00 2001 From: Benny Date: Tue, 21 Jun 2011 07:40:53 +0200 Subject: [PATCH 1/7] EasyWorship importer: added conversion of Tags - basically working, but some issues remain --- openlp/plugins/songs/lib/ewimport.py | 728 ++++++++++++++------------- 1 file changed, 380 insertions(+), 348 deletions(-) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index 09f84fbe2..2431743d6 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -1,348 +1,380 @@ -# -*- coding: utf-8 -*- -# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 - -############################################################################### -# OpenLP - Open Source Lyrics Projection # -# --------------------------------------------------------------------------- # -# Copyright (c) 2008-2011 Raoul Snyman # -# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan # -# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # -# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # -# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # -# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # -# --------------------------------------------------------------------------- # -# This program is free software; you can redistribute it and/or modify it # -# under the terms of the GNU General Public License as published by the Free # -# Software Foundation; version 2 of the License. # -# # -# This program is distributed in the hope that it will be useful, but WITHOUT # -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # -# more details. # -# # -# You should have received a copy of the GNU General Public License along # -# with this program; if not, write to the Free Software Foundation, Inc., 59 # -# Temple Place, Suite 330, Boston, MA 02111-1307 USA # -############################################################################### -""" -The :mod:`ewimport` module provides the functionality for importing -EasyWorship song databases into the current installation database. -""" - -import os -import struct - -from openlp.core.lib import translate -from openlp.core.ui.wizard import WizardStrings -from openlp.plugins.songs.lib import VerseType -from openlp.plugins.songs.lib import retrieve_windows_encoding -from songimport import SongImport - -def strip_rtf(blob, encoding): - depth = 0 - control = False - clear_text = [] - control_word = [] - for c in blob: - if control: - # for delimiters, set control to False - if c == '{': - if len(control_word) > 0: - depth += 1 - control = False - elif c == '}': - if len(control_word) > 0: - depth -= 1 - control = False - elif c == '\\': - new_control = (len(control_word) > 0) - control = False - elif c.isspace(): - control = False - else: - control_word.append(c) - if len(control_word) == 3 and control_word[0] == '\'': - control = False - if not control: - if len(control_word) == 0: - if c == '{' or c == '}' or c == '\\': - clear_text.append(c) - else: - control_str = ''.join(control_word) - if control_str == 'par' or control_str == 'line': - clear_text.append(u'\n') - elif control_str == 'tab': - clear_text.append(u'\t') - # Prefer the encoding specified by the RTF data to that - # specified by the Paradox table header - # West European encoding - elif control_str == 'fcharset0': - encoding = u'cp1252' - # Greek encoding - elif control_str == 'fcharset161': - encoding = u'cp1253' - # Turkish encoding - elif control_str == 'fcharset162': - encoding = u'cp1254' - # Vietnamese encoding - elif control_str == 'fcharset163': - encoding = u'cp1258' - # Hebrew encoding - elif control_str == 'fcharset177': - encoding = u'cp1255' - # Arabic encoding - elif control_str == 'fcharset178': - encoding = u'cp1256' - # Baltic encoding - elif control_str == 'fcharset186': - encoding = u'cp1257' - # Cyrillic encoding - elif control_str == 'fcharset204': - encoding = u'cp1251' - # Thai encoding - elif control_str == 'fcharset222': - encoding = u'cp874' - # Central+East European encoding - elif control_str == 'fcharset238': - encoding = u'cp1250' - elif control_str[0] == '\'': - s = chr(int(control_str[1:3], 16)) - clear_text.append(s.decode(encoding)) - del control_word[:] - if c == '\\' and new_control: - control = True - elif c == '{': - depth += 1 - elif c == '}': - depth -= 1 - elif depth > 2: - continue - elif c == '\n' or c == '\r': - continue - elif c == '\\': - control = True - else: - clear_text.append(c) - return u''.join(clear_text) - -class FieldDescEntry: - def __init__(self, name, type, size): - self.name = name - self.type = type - self.size = size - - -class EasyWorshipSongImport(SongImport): - """ - The :class:`EasyWorshipSongImport` class provides OpenLP with the - ability to import EasyWorship song files. - """ - def __init__(self, manager, **kwargs): - SongImport.__init__(self, manager, **kwargs) - - def do_import(self): - # Open the DB and MB files if they exist - import_source_mb = self.import_source.replace('.DB', '.MB') - if not os.path.isfile(self.import_source): - return - if not os.path.isfile(import_source_mb): - return - db_size = os.path.getsize(self.import_source) - if db_size < 0x800: - return - db_file = open(self.import_source, 'rb') - self.memo_file = open(import_source_mb, 'rb') - # Don't accept files that are clearly not paradox files - record_size, header_size, block_size, first_block, num_fields \ - = struct.unpack(' 4: - db_file.close() - self.memo_file.close() - return - # Take a stab at how text is encoded - self.encoding = u'cp1252' - db_file.seek(106) - code_page, = struct.unpack(''] - for field_desc in field_descs: - if field_desc.type == 1: - # string - fsl.append('%ds' % field_desc.size) - elif field_desc.type == 3: - # 16-bit int - fsl.append('H') - elif field_desc.type == 4: - # 32-bit int - fsl.append('I') - elif field_desc.type == 9: - # Logical - fsl.append('B') - elif field_desc.type == 0x0c: - # Memo - fsl.append('%ds' % field_desc.size) - elif field_desc.type == 0x0d: - # Blob - fsl.append('%ds' % field_desc.size) - elif field_desc.type == 0x15: - # Timestamp - fsl.append('Q') - else: - fsl.append('%ds' % field_desc.size) - self.record_struct = struct.Struct(''.join(fsl)) - self.field_descs = field_descs - - def get_field(self, field_desc_index): - field = self.fields[field_desc_index] - field_desc = self.field_descs[field_desc_index] - # Return None in case of 'blank' entries - if isinstance(field, str): - if len(field.rstrip('\0')) == 0: - return None - elif field == 0: - return None - # Format the field depending on the field type - if field_desc.type == 1: - # string - return field.rstrip('\0').decode(self.encoding) - elif field_desc.type == 3: - # 16-bit int - return field ^ 0x8000 - elif field_desc.type == 4: - # 32-bit int - return field ^ 0x80000000 - elif field_desc.type == 9: - # Logical - return (field ^ 0x80 == 1) - elif field_desc.type == 0x0c or field_desc.type == 0x0d: - # Memo or Blob - block_start, blob_size = \ - struct.unpack_from(' 63: - return u'' - self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR) - sub_block_start, = struct.unpack('B', self.memo_file.read(1)) - self.memo_file.seek(block_start + (sub_block_start * 16)) - else: - return u'' - return self.memo_file.read(blob_size) - else: - return 0 +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2011 Raoul Snyman # +# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan # +# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # +# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # +# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # +# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +The :mod:`ewimport` module provides the functionality for importing +EasyWorship song databases into the current installation database. +""" + +import os +import struct +import re + +from openlp.core.lib import translate +from openlp.core.ui.wizard import WizardStrings +from openlp.plugins.songs.lib import VerseType +from openlp.plugins.songs.lib import retrieve_windows_encoding +from songimport import SongImport + +def strip_rtf(blob, encoding): + depth = 0 + control = False + clear_text = [] + control_word = [] + for c in blob: + if control: + # for delimiters, set control to False + if c == '{': + if len(control_word) > 0: + depth += 1 + control = False + elif c == '}': + if len(control_word) > 0: + depth -= 1 + control = False + elif c == '\\': + new_control = (len(control_word) > 0) + control = False + elif c.isspace(): + control = False + else: + control_word.append(c) + if len(control_word) == 3 and control_word[0] == '\'': + control = False + if not control: + if len(control_word) == 0: + if c == '{' or c == '}' or c == '\\': + clear_text.append(c) + else: + control_str = ''.join(control_word) + if control_str == 'par' or control_str == 'line': + clear_text.append(u'\n') + elif control_str == 'tab': + clear_text.append(u'\t') + # Prefer the encoding specified by the RTF data to that + # specified by the Paradox table header + # West European encoding + elif control_str == 'fcharset0': + encoding = u'cp1252' + # Greek encoding + elif control_str == 'fcharset161': + encoding = u'cp1253' + # Turkish encoding + elif control_str == 'fcharset162': + encoding = u'cp1254' + # Vietnamese encoding + elif control_str == 'fcharset163': + encoding = u'cp1258' + # Hebrew encoding + elif control_str == 'fcharset177': + encoding = u'cp1255' + # Arabic encoding + elif control_str == 'fcharset178': + encoding = u'cp1256' + # Baltic encoding + elif control_str == 'fcharset186': + encoding = u'cp1257' + # Cyrillic encoding + elif control_str == 'fcharset204': + encoding = u'cp1251' + # Thai encoding + elif control_str == 'fcharset222': + encoding = u'cp874' + # Central+East European encoding + elif control_str == 'fcharset238': + encoding = u'cp1250' + elif control_str[0] == '\'': + s = chr(int(control_str[1:3], 16)) + clear_text.append(s.decode(encoding)) + del control_word[:] + if c == '\\' and new_control: + control = True + elif c == '{': + depth += 1 + elif c == '}': + depth -= 1 + elif depth > 2: + continue + elif c == '\n' or c == '\r': + continue + elif c == '\\': + control = True + else: + clear_text.append(c) + return u''.join(clear_text) + +class FieldDescEntry: + def __init__(self, name, type, size): + self.name = name + self.type = type + self.size = size + + +class EasyWorshipSongImport(SongImport): + """ + The :class:`EasyWorshipSongImport` class provides OpenLP with the + ability to import EasyWorship song files. + """ + def __init__(self, manager, **kwargs): + SongImport.__init__(self, manager, **kwargs) + + def do_import(self): + # Open the DB and MB files if they exist + import_source_mb = self.import_source.replace('.DB', '.MB') + if not os.path.isfile(self.import_source): + return + if not os.path.isfile(import_source_mb): + return + db_size = os.path.getsize(self.import_source) + if db_size < 0x800: + return + db_file = open(self.import_source, 'rb') + self.memo_file = open(import_source_mb, 'rb') + # Don't accept files that are clearly not paradox files + record_size, header_size, block_size, first_block, num_fields \ + = struct.unpack(' 4: + db_file.close() + self.memo_file.close() + return + # Take a stab at how text is encoded + self.encoding = u'cp1252' + db_file.seek(106) + code_page, = struct.unpack(' len(type): # tag is followed by number and/or note + p = re.compile(r'[0-9]+') + m = re.search(p, ew_tag) + if m: + number = m.group() + verse_type +=number + + p = re.compile(r'\(.*\)') + m = re.search(p, ew_tag) + if m: + self.comments += ew_tag+'\n' + break + + self.add_verse( + verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1 + verse_type) + if len(self.comments) > 5: + self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport', + '\n[above are Song Tags with notes imported from EasyWorship]')) + if self.stop_import_flag: + break + if not self.finish(): + self.log_error(self.import_source) + db_file.close() + self.memo_file.close() + + def find_field(self, field_name): + return [i for i, x in enumerate(self.field_descs) + if x.name == field_name][0] + + def set_record_struct(self, field_descs): + # Begin with empty field struct list + fsl = ['>'] + for field_desc in field_descs: + if field_desc.type == 1: + # string + fsl.append('%ds' % field_desc.size) + elif field_desc.type == 3: + # 16-bit int + fsl.append('H') + elif field_desc.type == 4: + # 32-bit int + fsl.append('I') + elif field_desc.type == 9: + # Logical + fsl.append('B') + elif field_desc.type == 0x0c: + # Memo + fsl.append('%ds' % field_desc.size) + elif field_desc.type == 0x0d: + # Blob + fsl.append('%ds' % field_desc.size) + elif field_desc.type == 0x15: + # Timestamp + fsl.append('Q') + else: + fsl.append('%ds' % field_desc.size) + self.record_struct = struct.Struct(''.join(fsl)) + self.field_descs = field_descs + + def get_field(self, field_desc_index): + field = self.fields[field_desc_index] + field_desc = self.field_descs[field_desc_index] + # Return None in case of 'blank' entries + if isinstance(field, str): + if len(field.rstrip('\0')) == 0: + return None + elif field == 0: + return None + # Format the field depending on the field type + if field_desc.type == 1: + # string + return field.rstrip('\0').decode(self.encoding) + elif field_desc.type == 3: + # 16-bit int + return field ^ 0x8000 + elif field_desc.type == 4: + # 32-bit int + return field ^ 0x80000000 + elif field_desc.type == 9: + # Logical + return (field ^ 0x80 == 1) + elif field_desc.type == 0x0c or field_desc.type == 0x0d: + # Memo or Blob + block_start, blob_size = \ + struct.unpack_from(' 63: + return u'' + self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR) + sub_block_start, = struct.unpack('B', self.memo_file.read(1)) + self.memo_file.seek(block_start + (sub_block_start * 16)) + else: + return u'' + return self.memo_file.read(blob_size) + else: + return 0 From 31dd4945bae57b2aeaa2f0310290d40824413b6d Mon Sep 17 00:00:00 2001 From: Benny Date: Tue, 21 Jun 2011 07:55:11 +0200 Subject: [PATCH 2/7] fixed line endings --- openlp/plugins/songs/lib/ewimport.py | 759 +++++++++++++-------------- 1 file changed, 379 insertions(+), 380 deletions(-) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index 2431743d6..fb82ab347 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -1,380 +1,379 @@ -# -*- coding: utf-8 -*- -# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 - -############################################################################### -# OpenLP - Open Source Lyrics Projection # -# --------------------------------------------------------------------------- # -# Copyright (c) 2008-2011 Raoul Snyman # -# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan # -# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # -# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # -# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # -# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # -# --------------------------------------------------------------------------- # -# This program is free software; you can redistribute it and/or modify it # -# under the terms of the GNU General Public License as published by the Free # -# Software Foundation; version 2 of the License. # -# # -# This program is distributed in the hope that it will be useful, but WITHOUT # -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # -# more details. # -# # -# You should have received a copy of the GNU General Public License along # -# with this program; if not, write to the Free Software Foundation, Inc., 59 # -# Temple Place, Suite 330, Boston, MA 02111-1307 USA # -############################################################################### -""" -The :mod:`ewimport` module provides the functionality for importing -EasyWorship song databases into the current installation database. -""" - -import os -import struct -import re - -from openlp.core.lib import translate -from openlp.core.ui.wizard import WizardStrings -from openlp.plugins.songs.lib import VerseType -from openlp.plugins.songs.lib import retrieve_windows_encoding -from songimport import SongImport - -def strip_rtf(blob, encoding): - depth = 0 - control = False - clear_text = [] - control_word = [] - for c in blob: - if control: - # for delimiters, set control to False - if c == '{': - if len(control_word) > 0: - depth += 1 - control = False - elif c == '}': - if len(control_word) > 0: - depth -= 1 - control = False - elif c == '\\': - new_control = (len(control_word) > 0) - control = False - elif c.isspace(): - control = False - else: - control_word.append(c) - if len(control_word) == 3 and control_word[0] == '\'': - control = False - if not control: - if len(control_word) == 0: - if c == '{' or c == '}' or c == '\\': - clear_text.append(c) - else: - control_str = ''.join(control_word) - if control_str == 'par' or control_str == 'line': - clear_text.append(u'\n') - elif control_str == 'tab': - clear_text.append(u'\t') - # Prefer the encoding specified by the RTF data to that - # specified by the Paradox table header - # West European encoding - elif control_str == 'fcharset0': - encoding = u'cp1252' - # Greek encoding - elif control_str == 'fcharset161': - encoding = u'cp1253' - # Turkish encoding - elif control_str == 'fcharset162': - encoding = u'cp1254' - # Vietnamese encoding - elif control_str == 'fcharset163': - encoding = u'cp1258' - # Hebrew encoding - elif control_str == 'fcharset177': - encoding = u'cp1255' - # Arabic encoding - elif control_str == 'fcharset178': - encoding = u'cp1256' - # Baltic encoding - elif control_str == 'fcharset186': - encoding = u'cp1257' - # Cyrillic encoding - elif control_str == 'fcharset204': - encoding = u'cp1251' - # Thai encoding - elif control_str == 'fcharset222': - encoding = u'cp874' - # Central+East European encoding - elif control_str == 'fcharset238': - encoding = u'cp1250' - elif control_str[0] == '\'': - s = chr(int(control_str[1:3], 16)) - clear_text.append(s.decode(encoding)) - del control_word[:] - if c == '\\' and new_control: - control = True - elif c == '{': - depth += 1 - elif c == '}': - depth -= 1 - elif depth > 2: - continue - elif c == '\n' or c == '\r': - continue - elif c == '\\': - control = True - else: - clear_text.append(c) - return u''.join(clear_text) - -class FieldDescEntry: - def __init__(self, name, type, size): - self.name = name - self.type = type - self.size = size - - -class EasyWorshipSongImport(SongImport): - """ - The :class:`EasyWorshipSongImport` class provides OpenLP with the - ability to import EasyWorship song files. - """ - def __init__(self, manager, **kwargs): - SongImport.__init__(self, manager, **kwargs) - - def do_import(self): - # Open the DB and MB files if they exist - import_source_mb = self.import_source.replace('.DB', '.MB') - if not os.path.isfile(self.import_source): - return - if not os.path.isfile(import_source_mb): - return - db_size = os.path.getsize(self.import_source) - if db_size < 0x800: - return - db_file = open(self.import_source, 'rb') - self.memo_file = open(import_source_mb, 'rb') - # Don't accept files that are clearly not paradox files - record_size, header_size, block_size, first_block, num_fields \ - = struct.unpack(' 4: - db_file.close() - self.memo_file.close() - return - # Take a stab at how text is encoded - self.encoding = u'cp1252' - db_file.seek(106) - code_page, = struct.unpack(' len(type): # tag is followed by number and/or note - p = re.compile(r'[0-9]+') - m = re.search(p, ew_tag) - if m: - number = m.group() - verse_type +=number - - p = re.compile(r'\(.*\)') - m = re.search(p, ew_tag) - if m: - self.comments += ew_tag+'\n' - break - - self.add_verse( - verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1 - verse_type) - if len(self.comments) > 5: - self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport', - '\n[above are Song Tags with notes imported from EasyWorship]')) - if self.stop_import_flag: - break - if not self.finish(): - self.log_error(self.import_source) - db_file.close() - self.memo_file.close() - - def find_field(self, field_name): - return [i for i, x in enumerate(self.field_descs) - if x.name == field_name][0] - - def set_record_struct(self, field_descs): - # Begin with empty field struct list - fsl = ['>'] - for field_desc in field_descs: - if field_desc.type == 1: - # string - fsl.append('%ds' % field_desc.size) - elif field_desc.type == 3: - # 16-bit int - fsl.append('H') - elif field_desc.type == 4: - # 32-bit int - fsl.append('I') - elif field_desc.type == 9: - # Logical - fsl.append('B') - elif field_desc.type == 0x0c: - # Memo - fsl.append('%ds' % field_desc.size) - elif field_desc.type == 0x0d: - # Blob - fsl.append('%ds' % field_desc.size) - elif field_desc.type == 0x15: - # Timestamp - fsl.append('Q') - else: - fsl.append('%ds' % field_desc.size) - self.record_struct = struct.Struct(''.join(fsl)) - self.field_descs = field_descs - - def get_field(self, field_desc_index): - field = self.fields[field_desc_index] - field_desc = self.field_descs[field_desc_index] - # Return None in case of 'blank' entries - if isinstance(field, str): - if len(field.rstrip('\0')) == 0: - return None - elif field == 0: - return None - # Format the field depending on the field type - if field_desc.type == 1: - # string - return field.rstrip('\0').decode(self.encoding) - elif field_desc.type == 3: - # 16-bit int - return field ^ 0x8000 - elif field_desc.type == 4: - # 32-bit int - return field ^ 0x80000000 - elif field_desc.type == 9: - # Logical - return (field ^ 0x80 == 1) - elif field_desc.type == 0x0c or field_desc.type == 0x0d: - # Memo or Blob - block_start, blob_size = \ - struct.unpack_from(' 63: - return u'' - self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR) - sub_block_start, = struct.unpack('B', self.memo_file.read(1)) - self.memo_file.seek(block_start + (sub_block_start * 16)) - else: - return u'' - return self.memo_file.read(blob_size) - else: - return 0 +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2011 Raoul Snyman # +# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan # +# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # +# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # +# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # +# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +The :mod:`ewimport` module provides the functionality for importing +EasyWorship song databases into the current installation database. +""" + +import os +import struct +import re + +from openlp.core.lib import translate +from openlp.core.ui.wizard import WizardStrings +from openlp.plugins.songs.lib import VerseType +from openlp.plugins.songs.lib import retrieve_windows_encoding +from songimport import SongImport + +def strip_rtf(blob, encoding): + depth = 0 + control = False + clear_text = [] + control_word = [] + for c in blob: + if control: + # for delimiters, set control to False + if c == '{': + if len(control_word) > 0: + depth += 1 + control = False + elif c == '}': + if len(control_word) > 0: + depth -= 1 + control = False + elif c == '\\': + new_control = (len(control_word) > 0) + control = False + elif c.isspace(): + control = False + else: + control_word.append(c) + if len(control_word) == 3 and control_word[0] == '\'': + control = False + if not control: + if len(control_word) == 0: + if c == '{' or c == '}' or c == '\\': + clear_text.append(c) + else: + control_str = ''.join(control_word) + if control_str == 'par' or control_str == 'line': + clear_text.append(u'\n') + elif control_str == 'tab': + clear_text.append(u'\t') + # Prefer the encoding specified by the RTF data to that + # specified by the Paradox table header + # West European encoding + elif control_str == 'fcharset0': + encoding = u'cp1252' + # Greek encoding + elif control_str == 'fcharset161': + encoding = u'cp1253' + # Turkish encoding + elif control_str == 'fcharset162': + encoding = u'cp1254' + # Vietnamese encoding + elif control_str == 'fcharset163': + encoding = u'cp1258' + # Hebrew encoding + elif control_str == 'fcharset177': + encoding = u'cp1255' + # Arabic encoding + elif control_str == 'fcharset178': + encoding = u'cp1256' + # Baltic encoding + elif control_str == 'fcharset186': + encoding = u'cp1257' + # Cyrillic encoding + elif control_str == 'fcharset204': + encoding = u'cp1251' + # Thai encoding + elif control_str == 'fcharset222': + encoding = u'cp874' + # Central+East European encoding + elif control_str == 'fcharset238': + encoding = u'cp1250' + elif control_str[0] == '\'': + s = chr(int(control_str[1:3], 16)) + clear_text.append(s.decode(encoding)) + del control_word[:] + if c == '\\' and new_control: + control = True + elif c == '{': + depth += 1 + elif c == '}': + depth -= 1 + elif depth > 2: + continue + elif c == '\n' or c == '\r': + continue + elif c == '\\': + control = True + else: + clear_text.append(c) + return u''.join(clear_text) + +class FieldDescEntry: + def __init__(self, name, type, size): + self.name = name + self.type = type + self.size = size + + +class EasyWorshipSongImport(SongImport): + """ + The :class:`EasyWorshipSongImport` class provides OpenLP with the + ability to import EasyWorship song files. + """ + def __init__(self, manager, **kwargs): + SongImport.__init__(self, manager, **kwargs) + + def do_import(self): + # Open the DB and MB files if they exist + import_source_mb = self.import_source.replace('.DB', '.MB') + if not os.path.isfile(self.import_source): + return + if not os.path.isfile(import_source_mb): + return + db_size = os.path.getsize(self.import_source) + if db_size < 0x800: + return + db_file = open(self.import_source, 'rb') + self.memo_file = open(import_source_mb, 'rb') + # Don't accept files that are clearly not paradox files + record_size, header_size, block_size, first_block, num_fields \ + = struct.unpack(' 4: + db_file.close() + self.memo_file.close() + return + # Take a stab at how text is encoded + self.encoding = u'cp1252' + db_file.seek(106) + code_page, = struct.unpack(' len(type): # tag is followed by number and/or note + p = re.compile(r'[0-9]+') + m = re.search(p, ew_tag) + if m: + number = m.group() + verse_type +=number + + p = re.compile(r'\(.*\)') + m = re.search(p, ew_tag) + if m: + self.comments += ew_tag+'\n' + break + self.add_verse( + verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1 + verse_type) + if len(self.comments) > 5: + self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport', + '\n[above are Song Tags with notes imported from EasyWorship]')) + if self.stop_import_flag: + break + if not self.finish(): + self.log_error(self.import_source) + db_file.close() + self.memo_file.close() + + def find_field(self, field_name): + return [i for i, x in enumerate(self.field_descs) + if x.name == field_name][0] + + def set_record_struct(self, field_descs): + # Begin with empty field struct list + fsl = ['>'] + for field_desc in field_descs: + if field_desc.type == 1: + # string + fsl.append('%ds' % field_desc.size) + elif field_desc.type == 3: + # 16-bit int + fsl.append('H') + elif field_desc.type == 4: + # 32-bit int + fsl.append('I') + elif field_desc.type == 9: + # Logical + fsl.append('B') + elif field_desc.type == 0x0c: + # Memo + fsl.append('%ds' % field_desc.size) + elif field_desc.type == 0x0d: + # Blob + fsl.append('%ds' % field_desc.size) + elif field_desc.type == 0x15: + # Timestamp + fsl.append('Q') + else: + fsl.append('%ds' % field_desc.size) + self.record_struct = struct.Struct(''.join(fsl)) + self.field_descs = field_descs + + def get_field(self, field_desc_index): + field = self.fields[field_desc_index] + field_desc = self.field_descs[field_desc_index] + # Return None in case of 'blank' entries + if isinstance(field, str): + if len(field.rstrip('\0')) == 0: + return None + elif field == 0: + return None + # Format the field depending on the field type + if field_desc.type == 1: + # string + return field.rstrip('\0').decode(self.encoding) + elif field_desc.type == 3: + # 16-bit int + return field ^ 0x8000 + elif field_desc.type == 4: + # 32-bit int + return field ^ 0x80000000 + elif field_desc.type == 9: + # Logical + return (field ^ 0x80 == 1) + elif field_desc.type == 0x0c or field_desc.type == 0x0d: + # Memo or Blob + block_start, blob_size = \ + struct.unpack_from(' 63: + return u'' + self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR) + sub_block_start, = struct.unpack('B', self.memo_file.read(1)) + self.memo_file.seek(block_start + (sub_block_start * 16)) + else: + return u'' + return self.memo_file.read(blob_size) + else: + return 0 From bc808ade93762cf5d3a1a4501e0f3378a1cee4c3 Mon Sep 17 00:00:00 2001 From: Benny Date: Sat, 2 Jul 2011 00:45:27 +0200 Subject: [PATCH 3/7] EasyWorshipSongImport: use tag from previous slide for slides without tag, fix regex for notes --- openlp/plugins/songs/lib/ewimport.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index c207a07d2..95533ba94 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -261,6 +261,7 @@ class EasyWorshipSongImport(SongImport): # Format the lyrics words = strip_rtf(words, self.encoding) # TODO: convert rtf instead of stripping? p = re.compile(r'\n *?\n[\n ]*') # at least two newlines, with zero or more space characters between them + verse_type = VerseType.Tags[VerseType.Verse] # TODO!!!: use previous verse type.... for verse in p.split(words): #for verse in words.split(u'\n\n'): # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide @@ -268,13 +269,11 @@ class EasyWorshipSongImport(SongImport): if len(verse) == 0: continue verse_split = verse.split(u'\n', 1) - verse_type = VerseType.Tags[VerseType.Verse] first_line_is_tag = False for type in VerseType.Names+['tag', 'slide']: # doesnt cover tag, slide type = type.lower() ew_tag = verse_split[0].strip().lower() if ew_tag.startswith(type): - #print ew_tag verse_type = type[0] if type == 'tag' or type == 'slide': verse_type = VerseType.Tags[VerseType.Other] @@ -286,7 +285,7 @@ class EasyWorshipSongImport(SongImport): number = m.group() verse_type +=number - p = re.compile(r'\(.*\)') + p = re.compile(r'\(.*?\)') m = re.search(p, ew_tag) if m: self.comments += ew_tag+'\n' From 4bf45ad2defe7018062c65abdc1b586e7d241c2e Mon Sep 17 00:00:00 2001 From: Benny Date: Mon, 4 Jul 2011 22:51:43 +0200 Subject: [PATCH 4/7] ewimport: workaround for RTF stripping bug --- openlp/plugins/songs/lib/ewimport.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index 95533ba94..a50c97f47 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -44,6 +44,14 @@ def strip_rtf(blob, encoding): control = False clear_text = [] control_word = [] + + # workaround for \tx bug: remove one pair of curly braces if \tx is encountered + p = re.compile(r'\{\\tx[^}]*\}') + m = p.search(blob) + if m: + # start and end indices of match are curly braces - filter them out + blob = ''.join([blob[i] for i in xrange(len(blob)) if i != m.start() and i !=m.end()]) + for c in blob: if control: # for delimiters, set control to False From 3c0c9c5b781d80899f1b73543a085c779bfc9ac8 Mon Sep 17 00:00:00 2001 From: Benny Date: Tue, 5 Jul 2011 00:55:57 +0200 Subject: [PATCH 5/7] EasyWorship importer: some work to create more reasonable verse numbers if EW tags are missing or without numbers --- openlp/plugins/songs/lib/ewimport.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index a50c97f47..18b87f9c0 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -267,18 +267,18 @@ class EasyWorshipSongImport(SongImport): self.add_author(author_name.strip()) if words: # Format the lyrics - words = strip_rtf(words, self.encoding) # TODO: convert rtf instead of stripping? - p = re.compile(r'\n *?\n[\n ]*') # at least two newlines, with zero or more space characters between them - verse_type = VerseType.Tags[VerseType.Verse] # TODO!!!: use previous verse type.... + words = strip_rtf(words, self.encoding) # TODO: convert rtf to display tags? + # regex: at least two newlines, with zero or more space characters between them + p = re.compile(r'\n *?\n[\n ]*') + verse_type = VerseType.Tags[VerseType.Verse] for verse in p.split(words): - #for verse in words.split(u'\n\n'): - # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide verse = verse.strip() if len(verse) == 0: continue verse_split = verse.split(u'\n', 1) first_line_is_tag = False - for type in VerseType.Names+['tag', 'slide']: # doesnt cover tag, slide + # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide + for type in VerseType.Names+['tag', 'slide']: type = type.lower() ew_tag = verse_split[0].strip().lower() if ew_tag.startswith(type): @@ -286,20 +286,24 @@ class EasyWorshipSongImport(SongImport): if type == 'tag' or type == 'slide': verse_type = VerseType.Tags[VerseType.Other] first_line_is_tag = True + number_found = False if len(ew_tag) > len(type): # tag is followed by number and/or note p = re.compile(r'[0-9]+') m = re.search(p, ew_tag) if m: number = m.group() verse_type +=number + number_found = True p = re.compile(r'\(.*?\)') m = re.search(p, ew_tag) if m: self.comments += ew_tag+'\n' + if not number_found: + verse_type += '1' break self.add_verse( - verse_split[-1].strip() if first_line_is_tag else verse, # TODO: hacky: -1 + verse_split[-1].strip() if first_line_is_tag else verse, verse_type) if len(self.comments) > 5: self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport', From 6436b05240635a5b5c3fd4675e32fad3bf46c7d4 Mon Sep 17 00:00:00 2001 From: Benny Date: Tue, 5 Jul 2011 12:50:55 +0200 Subject: [PATCH 6/7] changes from review (cosmetic & regex performance) --- openlp/plugins/songs/lib/ewimport.py | 54 +++++++++++++++------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index 18b87f9c0..732c6e4f0 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -39,18 +39,25 @@ from openlp.plugins.songs.lib import VerseType from openlp.plugins.songs.lib import retrieve_windows_encoding from songimport import SongImport +RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}') +# regex: at least two newlines, can have spaces between them +SLIDE_BREAK_REGEX = re.compile(r'\n *?\n[\n ]*') +NUMBER_REGEX = re.compile(r'[0-9]+') +NOTE_REGEX = re.compile(r'\(.*?\)') + def strip_rtf(blob, encoding): depth = 0 control = False clear_text = [] control_word = [] - # workaround for \tx bug: remove one pair of curly braces if \tx is encountered - p = re.compile(r'\{\\tx[^}]*\}') - m = p.search(blob) - if m: + # workaround for \tx bug: remove one pair of curly braces + # if \tx is encountered + match = RTF_STRIPPING_REGEX.search(blob) + if match: # start and end indices of match are curly braces - filter them out - blob = ''.join([blob[i] for i in xrange(len(blob)) if i != m.start() and i !=m.end()]) + blob = ''.join([blob[i] for i in xrange(len(blob)) + if i != match.start() and i !=match.end()]) for c in blob: if control: @@ -267,17 +274,16 @@ class EasyWorshipSongImport(SongImport): self.add_author(author_name.strip()) if words: # Format the lyrics - words = strip_rtf(words, self.encoding) # TODO: convert rtf to display tags? - # regex: at least two newlines, with zero or more space characters between them - p = re.compile(r'\n *?\n[\n ]*') + words = strip_rtf(words, self.encoding) verse_type = VerseType.Tags[VerseType.Verse] - for verse in p.split(words): + for verse in SLIDE_BREAK_REGEX.split(words): verse = verse.strip() if len(verse) == 0: continue - verse_split = verse.split(u'\n', 1) + verse_split = verse.split(u'\n', 1) first_line_is_tag = False - # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide + # EW tags: verse, chorus, pre-chorus, bridge, tag, + # intro, ending, slide for type in VerseType.Names+['tag', 'slide']: type = type.lower() ew_tag = verse_split[0].strip().lower() @@ -287,27 +293,27 @@ class EasyWorshipSongImport(SongImport): verse_type = VerseType.Tags[VerseType.Other] first_line_is_tag = True number_found = False - if len(ew_tag) > len(type): # tag is followed by number and/or note - p = re.compile(r'[0-9]+') - m = re.search(p, ew_tag) - if m: - number = m.group() + # check if tag is followed by number and/or note + if len(ew_tag) > len(type): + match = NUMBER_REGEX.search(ew_tag) + if match: + number = match.group() verse_type +=number number_found = True - - p = re.compile(r'\(.*?\)') - m = re.search(p, ew_tag) - if m: - self.comments += ew_tag+'\n' + match = NOTE_REGEX.search(ew_tag) + if match: + self.comments += ew_tag + u'\n' if not number_found: - verse_type += '1' + verse_type += u'1' break self.add_verse( verse_split[-1].strip() if first_line_is_tag else verse, verse_type) if len(self.comments) > 5: - self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport', - '\n[above are Song Tags with notes imported from EasyWorship]')) + self.comments += unicode( + translate('SongsPlugin.EasyWorshipSongImport', + '\n[above are Song Tags with notes imported from \ + EasyWorship]')) if self.stop_import_flag: break if not self.finish(): From 1876d520ae4a23899210c6ca5efe3ee474a588ac Mon Sep 17 00:00:00 2001 From: Benny Date: Tue, 5 Jul 2011 14:00:34 +0200 Subject: [PATCH 7/7] review fixes --- openlp/plugins/songs/lib/ewimport.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index 732c6e4f0..448d629d5 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -278,18 +278,18 @@ class EasyWorshipSongImport(SongImport): verse_type = VerseType.Tags[VerseType.Verse] for verse in SLIDE_BREAK_REGEX.split(words): verse = verse.strip() - if len(verse) == 0: + if not verse: continue verse_split = verse.split(u'\n', 1) first_line_is_tag = False # EW tags: verse, chorus, pre-chorus, bridge, tag, # intro, ending, slide - for type in VerseType.Names+['tag', 'slide']: + for type in VerseType.Names+[u'tag', u'slide']: type = type.lower() ew_tag = verse_split[0].strip().lower() if ew_tag.startswith(type): verse_type = type[0] - if type == 'tag' or type == 'slide': + if type == u'tag' or type == u'slide': verse_type = VerseType.Tags[VerseType.Other] first_line_is_tag = True number_found = False @@ -298,7 +298,7 @@ class EasyWorshipSongImport(SongImport): match = NUMBER_REGEX.search(ew_tag) if match: number = match.group() - verse_type +=number + verse_type += number number_found = True match = NOTE_REGEX.search(ew_tag) if match: