From 2939151ff1e78841b361fe1090617ea07b13f6d5 Mon Sep 17 00:00:00 2001
From: Benny <benny.wasty@googlemail.com>
Date: Tue, 21 Jun 2011 07:40:53 +0200
Subject: [PATCH 1/7] EasyWorship importer: added conversion of Tags -
 basically working, but some issues remain

---
 openlp/plugins/songs/lib/ewimport.py | 728 ++++++++++++++-------------
 1 file changed, 380 insertions(+), 348 deletions(-)

diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py
index 09f84fbe2..2431743d6 100644
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@@ -1,348 +1,380 @@
-# -*- coding: utf-8 -*-
-# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
-
-###############################################################################
-# OpenLP - Open Source Lyrics Projection                                      #
-# --------------------------------------------------------------------------- #
-# Copyright (c) 2008-2011 Raoul Snyman                                        #
-# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan      #
-# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan,      #
-# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias     #
-# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,    #
-# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund             #
-# --------------------------------------------------------------------------- #
-# This program is free software; you can redistribute it and/or modify it     #
-# under the terms of the GNU General Public License as published by the Free  #
-# Software Foundation; version 2 of the License.                              #
-#                                                                             #
-# This program is distributed in the hope that it will be useful, but WITHOUT #
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
-# more details.                                                               #
-#                                                                             #
-# You should have received a copy of the GNU General Public License along     #
-# with this program; if not, write to the Free Software Foundation, Inc., 59  #
-# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
-###############################################################################
-"""
-The :mod:`ewimport` module provides the functionality for importing
-EasyWorship song databases into the current installation database.
-"""
-
-import os
-import struct
-
-from openlp.core.lib import translate
-from openlp.core.ui.wizard import WizardStrings
-from openlp.plugins.songs.lib import VerseType
-from openlp.plugins.songs.lib import retrieve_windows_encoding
-from songimport import SongImport
-
-def strip_rtf(blob, encoding):
-    depth = 0
-    control = False
-    clear_text = []
-    control_word = []
-    for c in blob:
-        if control:
-            # for delimiters, set control to False
-            if c == '{':
-                if len(control_word) > 0:
-                    depth += 1
-                control = False
-            elif c == '}':
-                if len(control_word) > 0:
-                    depth -= 1
-                control = False
-            elif c == '\\':
-                new_control = (len(control_word) > 0)
-                control = False
-            elif c.isspace():
-                control = False
-            else:
-                control_word.append(c)
-                if len(control_word) == 3 and control_word[0] == '\'':
-                    control = False
-            if not control:
-                if len(control_word) == 0:
-                    if c == '{' or c == '}' or c == '\\':
-                        clear_text.append(c)
-                else:
-                    control_str = ''.join(control_word)
-                    if control_str == 'par' or control_str == 'line':
-                        clear_text.append(u'\n')
-                    elif control_str == 'tab':
-                        clear_text.append(u'\t')
-                    # Prefer the encoding specified by the RTF data to that
-                    # specified by the Paradox table header
-                    # West European encoding
-                    elif control_str == 'fcharset0':
-                        encoding = u'cp1252'
-                    # Greek encoding
-                    elif control_str == 'fcharset161':
-                        encoding = u'cp1253'
-                    # Turkish encoding
-                    elif control_str == 'fcharset162':
-                        encoding = u'cp1254'
-                    # Vietnamese encoding
-                    elif control_str == 'fcharset163':
-                        encoding = u'cp1258'
-                    # Hebrew encoding
-                    elif control_str == 'fcharset177':
-                        encoding = u'cp1255'
-                    # Arabic encoding
-                    elif control_str == 'fcharset178':
-                        encoding = u'cp1256'
-                    # Baltic encoding
-                    elif control_str == 'fcharset186':
-                        encoding = u'cp1257'
-                    # Cyrillic encoding
-                    elif control_str == 'fcharset204':
-                        encoding = u'cp1251'
-                    # Thai encoding
-                    elif control_str == 'fcharset222':
-                        encoding = u'cp874'
-                    # Central+East European encoding
-                    elif control_str == 'fcharset238':
-                        encoding = u'cp1250'
-                    elif control_str[0] == '\'':
-                        s = chr(int(control_str[1:3], 16))
-                        clear_text.append(s.decode(encoding))
-                    del control_word[:]
-            if c == '\\' and new_control:
-                control = True
-        elif c == '{':
-            depth += 1
-        elif c == '}':
-            depth -= 1
-        elif depth > 2:
-            continue
-        elif c == '\n' or c == '\r':
-            continue
-        elif c == '\\':
-            control = True
-        else:
-            clear_text.append(c)
-    return u''.join(clear_text)
-
-class FieldDescEntry:
-    def __init__(self, name, type, size):
-        self.name = name
-        self.type = type
-        self.size = size
-
-
-class EasyWorshipSongImport(SongImport):
-    """
-    The :class:`EasyWorshipSongImport` class provides OpenLP with the
-    ability to import EasyWorship song files.
-    """
-    def __init__(self, manager, **kwargs):
-        SongImport.__init__(self, manager, **kwargs)
-
-    def do_import(self):
-        # Open the DB and MB files if they exist
-        import_source_mb = self.import_source.replace('.DB', '.MB')
-        if not os.path.isfile(self.import_source):
-            return
-        if not os.path.isfile(import_source_mb):
-            return
-        db_size = os.path.getsize(self.import_source)
-        if db_size < 0x800:
-            return
-        db_file = open(self.import_source, 'rb')
-        self.memo_file = open(import_source_mb, 'rb')
-        # Don't accept files that are clearly not paradox files
-        record_size, header_size, block_size, first_block, num_fields \
-            = struct.unpack('<hhxb8xh17xh', db_file.read(35))
-        if header_size != 0x800 or block_size < 1 or block_size > 4:
-            db_file.close()
-            self.memo_file.close()
-            return
-        # Take a stab at how text is encoded
-        self.encoding = u'cp1252'
-        db_file.seek(106)
-        code_page, = struct.unpack('<h', db_file.read(2))
-        if code_page == 852:
-            self.encoding = u'cp1250'
-        # The following codepage to actual encoding mappings have not been
-        # observed, but merely guessed. Actual example files are needed.
-        elif code_page == 737:
-            self.encoding = u'cp1253'
-        elif code_page == 775:
-            self.encoding = u'cp1257'
-        elif code_page == 855:
-            self.encoding = u'cp1251'
-        elif code_page == 857:
-            self.encoding = u'cp1254'
-        elif code_page == 866:
-            self.encoding = u'cp1251'
-        elif code_page == 869:
-            self.encoding = u'cp1253'
-        elif code_page == 862:
-            self.encoding = u'cp1255'
-        elif code_page == 874:
-            self.encoding = u'cp874'
-        self.encoding = retrieve_windows_encoding(self.encoding)
-        if not self.encoding:
-            return
-        # There does not appear to be a _reliable_ way of getting the number
-        # of songs/records, so let's use file blocks for measuring progress.
-        total_blocks = (db_size - header_size) / (block_size * 1024)
-        self.import_wizard.progressBar.setMaximum(total_blocks)
-        # Read the field description information
-        db_file.seek(120)
-        field_info = db_file.read(num_fields * 2)
-        db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR)
-        field_names = db_file.read(header_size - db_file.tell()).split('\0',
-            num_fields)
-        field_names.pop()
-        field_descs = []
-        for i, field_name in enumerate(field_names):
-            field_type, field_size = struct.unpack_from('BB',
-                field_info, i * 2)
-            field_descs.append(FieldDescEntry(field_name, field_type,
-                field_size))
-        self.set_record_struct(field_descs)
-        # Pick out the field description indexes we will need
-        try:
-            success = True
-            fi_title = self.find_field(u'Title')
-            fi_author = self.find_field(u'Author')
-            fi_copy = self.find_field(u'Copyright')
-            fi_admin = self.find_field(u'Administrator')
-            fi_words = self.find_field(u'Words')
-            fi_ccli = self.find_field(u'Song Number')
-        except IndexError:
-            # This is the wrong table
-            success = False
-        # Loop through each block of the file
-        cur_block = first_block
-        while cur_block != 0 and success:
-            db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size))
-            cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6))
-            rec_count = (rec_count + record_size) / record_size
-            # Loop through each record within the current block
-            for i in range(rec_count):
-                if self.stop_import_flag:
-                    break
-                raw_record = db_file.read(record_size)
-                self.fields = self.record_struct.unpack(raw_record)
-                self.set_defaults()
-                self.title = self.get_field(fi_title)
-                # Get remaining fields.
-                copy = self.get_field(fi_copy)
-                admin = self.get_field(fi_admin)
-                ccli = self.get_field(fi_ccli)
-                authors = self.get_field(fi_author)
-                words = self.get_field(fi_words)
-                # Set the SongImport object members.
-                if copy:
-                    self.copyright = copy
-                if admin:
-                    if copy:
-                        self.copyright += u', '
-                    self.copyright += \
-                        unicode(translate('SongsPlugin.EasyWorshipSongImport',
-                            'Administered by %s')) % admin
-                if ccli:
-                    self.ccli_number = ccli
-                if authors:
-                    # Split up the authors
-                    author_list = authors.split(u'/')
-                    if len(author_list) < 2:
-                        author_list = authors.split(u';')
-                    if len(author_list) < 2:
-                        author_list = authors.split(u',')
-                    for author_name in author_list:
-                        self.add_author(author_name.strip())
-                if words:
-                    # Format the lyrics
-                    words = strip_rtf(words, self.encoding)
-                    for verse in words.split(u'\n\n'):
-                        self.add_verse(
-                            verse.strip(), VerseType.Tags[VerseType.Verse])
-                if self.stop_import_flag:
-                    break
-                if not self.finish():
-                    self.log_error(self.import_source)
-        db_file.close()
-        self.memo_file.close()
-
-    def find_field(self, field_name):
-        return [i for i, x in enumerate(self.field_descs)
-            if x.name == field_name][0]
-
-    def set_record_struct(self, field_descs):
-        # Begin with empty field struct list
-        fsl = ['>']
-        for field_desc in field_descs:
-            if field_desc.type == 1:
-                # string
-                fsl.append('%ds' % field_desc.size)
-            elif field_desc.type == 3:
-                # 16-bit int
-                fsl.append('H')
-            elif field_desc.type == 4:
-                # 32-bit int
-                fsl.append('I')
-            elif field_desc.type == 9:
-                # Logical
-                fsl.append('B')
-            elif field_desc.type == 0x0c:
-                # Memo
-                fsl.append('%ds' % field_desc.size)
-            elif field_desc.type == 0x0d:
-                # Blob
-                fsl.append('%ds' % field_desc.size)
-            elif field_desc.type == 0x15:
-                # Timestamp
-                fsl.append('Q')
-            else:
-                fsl.append('%ds' % field_desc.size)
-        self.record_struct = struct.Struct(''.join(fsl))
-        self.field_descs = field_descs
-
-    def get_field(self, field_desc_index):
-        field = self.fields[field_desc_index]
-        field_desc = self.field_descs[field_desc_index]
-        # Return None in case of 'blank' entries
-        if isinstance(field, str):
-            if len(field.rstrip('\0')) == 0:
-                return None
-        elif field == 0:
-            return None
-        # Format the field depending on the field type
-        if field_desc.type == 1:
-            # string
-            return field.rstrip('\0').decode(self.encoding)
-        elif field_desc.type == 3:
-            # 16-bit int
-            return field ^ 0x8000
-        elif field_desc.type == 4:
-            # 32-bit int
-            return field ^ 0x80000000
-        elif field_desc.type == 9:
-            # Logical
-            return (field ^ 0x80 == 1)
-        elif field_desc.type == 0x0c or field_desc.type == 0x0d:
-            # Memo or Blob
-            block_start, blob_size = \
-                struct.unpack_from('<II', field, len(field)-10)
-            sub_block = block_start & 0xff
-            block_start &= ~0xff
-            self.memo_file.seek(block_start)
-            memo_block_type, = struct.unpack('b', self.memo_file.read(1))
-            if memo_block_type == 2:
-                self.memo_file.seek(8, os.SEEK_CUR)
-            elif memo_block_type == 3:
-                if sub_block > 63:
-                    return u''
-                self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR)
-                sub_block_start, = struct.unpack('B', self.memo_file.read(1))
-                self.memo_file.seek(block_start + (sub_block_start * 16))
-            else:
-                return u''
-            return self.memo_file.read(blob_size)
-        else:
-            return 0
+# -*- coding: utf-8 -*-
+# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
+
+###############################################################################
+# OpenLP - Open Source Lyrics Projection                                      #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2011 Raoul Snyman                                        #
+# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan      #
+# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan,      #
+# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias     #
+# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,    #
+# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund             #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 59  #
+# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
+###############################################################################
+"""
+The :mod:`ewimport` module provides the functionality for importing
+EasyWorship song databases into the current installation database.
+"""
+
+import os
+import struct
+import re
+
+from openlp.core.lib import translate
+from openlp.core.ui.wizard import WizardStrings
+from openlp.plugins.songs.lib import VerseType
+from openlp.plugins.songs.lib import retrieve_windows_encoding
+from songimport import SongImport
+
+def strip_rtf(blob, encoding):
+    depth = 0
+    control = False
+    clear_text = []
+    control_word = []
+    for c in blob:
+        if control:
+            # for delimiters, set control to False
+            if c == '{':
+                if len(control_word) > 0:
+                    depth += 1
+                control = False
+            elif c == '}':
+                if len(control_word) > 0:
+                    depth -= 1
+                control = False
+            elif c == '\\':
+                new_control = (len(control_word) > 0)
+                control = False
+            elif c.isspace():
+                control = False
+            else:
+                control_word.append(c)
+                if len(control_word) == 3 and control_word[0] == '\'':
+                    control = False
+            if not control:
+                if len(control_word) == 0:
+                    if c == '{' or c == '}' or c == '\\':
+                        clear_text.append(c)
+                else:
+                    control_str = ''.join(control_word)
+                    if control_str == 'par' or control_str == 'line':
+                        clear_text.append(u'\n')
+                    elif control_str == 'tab':
+                        clear_text.append(u'\t')
+                    # Prefer the encoding specified by the RTF data to that
+                    # specified by the Paradox table header
+                    # West European encoding
+                    elif control_str == 'fcharset0':
+                        encoding = u'cp1252'
+                    # Greek encoding
+                    elif control_str == 'fcharset161':
+                        encoding = u'cp1253'
+                    # Turkish encoding
+                    elif control_str == 'fcharset162':
+                        encoding = u'cp1254'
+                    # Vietnamese encoding
+                    elif control_str == 'fcharset163':
+                        encoding = u'cp1258'
+                    # Hebrew encoding
+                    elif control_str == 'fcharset177':
+                        encoding = u'cp1255'
+                    # Arabic encoding
+                    elif control_str == 'fcharset178':
+                        encoding = u'cp1256'
+                    # Baltic encoding
+                    elif control_str == 'fcharset186':
+                        encoding = u'cp1257'
+                    # Cyrillic encoding
+                    elif control_str == 'fcharset204':
+                        encoding = u'cp1251'
+                    # Thai encoding
+                    elif control_str == 'fcharset222':
+                        encoding = u'cp874'
+                    # Central+East European encoding
+                    elif control_str == 'fcharset238':
+                        encoding = u'cp1250'
+                    elif control_str[0] == '\'':
+                        s = chr(int(control_str[1:3], 16))
+                        clear_text.append(s.decode(encoding))
+                    del control_word[:]
+            if c == '\\' and new_control:
+                control = True
+        elif c == '{':
+            depth += 1
+        elif c == '}':
+            depth -= 1
+        elif depth > 2:
+            continue
+        elif c == '\n' or c == '\r':
+            continue
+        elif c == '\\':
+            control = True
+        else:
+            clear_text.append(c)
+    return u''.join(clear_text)
+
+class FieldDescEntry:
+    def __init__(self, name, type, size):
+        self.name = name
+        self.type = type
+        self.size = size
+
+
+class EasyWorshipSongImport(SongImport):
+    """
+    The :class:`EasyWorshipSongImport` class provides OpenLP with the
+    ability to import EasyWorship song files.
+    """
+    def __init__(self, manager, **kwargs):
+        SongImport.__init__(self, manager, **kwargs)
+
+    def do_import(self):
+        # Open the DB and MB files if they exist
+        import_source_mb = self.import_source.replace('.DB', '.MB')
+        if not os.path.isfile(self.import_source):
+            return
+        if not os.path.isfile(import_source_mb):
+            return
+        db_size = os.path.getsize(self.import_source)
+        if db_size < 0x800:
+            return
+        db_file = open(self.import_source, 'rb')
+        self.memo_file = open(import_source_mb, 'rb')
+        # Don't accept files that are clearly not paradox files
+        record_size, header_size, block_size, first_block, num_fields \
+            = struct.unpack('<hhxb8xh17xh', db_file.read(35))
+        if header_size != 0x800 or block_size < 1 or block_size > 4:
+            db_file.close()
+            self.memo_file.close()
+            return
+        # Take a stab at how text is encoded
+        self.encoding = u'cp1252'
+        db_file.seek(106)
+        code_page, = struct.unpack('<h', db_file.read(2))
+        if code_page == 852:
+            self.encoding = u'cp1250'
+        # The following codepage to actual encoding mappings have not been
+        # observed, but merely guessed. Actual example files are needed.
+        elif code_page == 737:
+            self.encoding = u'cp1253'
+        elif code_page == 775:
+            self.encoding = u'cp1257'
+        elif code_page == 855:
+            self.encoding = u'cp1251'
+        elif code_page == 857:
+            self.encoding = u'cp1254'
+        elif code_page == 866:
+            self.encoding = u'cp1251'
+        elif code_page == 869:
+            self.encoding = u'cp1253'
+        elif code_page == 862:
+            self.encoding = u'cp1255'
+        elif code_page == 874:
+            self.encoding = u'cp874'
+        self.encoding = retrieve_windows_encoding(self.encoding)
+        if not self.encoding:
+            return
+        # There does not appear to be a _reliable_ way of getting the number
+        # of songs/records, so let's use file blocks for measuring progress.
+        total_blocks = (db_size - header_size) / (block_size * 1024)
+        self.import_wizard.progressBar.setMaximum(total_blocks)
+        # Read the field description information
+        db_file.seek(120)
+        field_info = db_file.read(num_fields * 2)
+        db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR)
+        field_names = db_file.read(header_size - db_file.tell()).split('\0',
+            num_fields)
+        field_names.pop()
+        field_descs = []
+        for i, field_name in enumerate(field_names):
+            field_type, field_size = struct.unpack_from('BB',
+                field_info, i * 2)
+            field_descs.append(FieldDescEntry(field_name, field_type,
+                field_size))
+        self.set_record_struct(field_descs)
+        # Pick out the field description indexes we will need
+        try:
+            success = True
+            fi_title = self.find_field(u'Title')
+            fi_author = self.find_field(u'Author')
+            fi_copy = self.find_field(u'Copyright')
+            fi_admin = self.find_field(u'Administrator')
+            fi_words = self.find_field(u'Words')
+            fi_ccli = self.find_field(u'Song Number')
+        except IndexError:
+            # This is the wrong table
+            success = False
+        # Loop through each block of the file
+        cur_block = first_block
+        while cur_block != 0 and success:
+            db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size))
+            cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6))
+            rec_count = (rec_count + record_size) / record_size
+            # Loop through each record within the current block
+            for i in range(rec_count):
+                if self.stop_import_flag:
+                    break
+                raw_record = db_file.read(record_size)
+                self.fields = self.record_struct.unpack(raw_record)
+                self.set_defaults()
+                self.title = self.get_field(fi_title)
+                # Get remaining fields.
+                copy = self.get_field(fi_copy)
+                admin = self.get_field(fi_admin)
+                ccli = self.get_field(fi_ccli)
+                authors = self.get_field(fi_author)
+                words = self.get_field(fi_words)
+                # Set the SongImport object members.
+                if copy:
+                    self.copyright = copy
+                if admin:
+                    if copy:
+                        self.copyright += u', '
+                    self.copyright += \
+                        unicode(translate('SongsPlugin.EasyWorshipSongImport',
+                            'Administered by %s')) % admin
+                if ccli:
+                    self.ccli_number = ccli
+                if authors:
+                    # Split up the authors
+                    author_list = authors.split(u'/')
+                    if len(author_list) < 2:
+                        author_list = authors.split(u';')
+                    if len(author_list) < 2:
+                        author_list = authors.split(u',')
+                    for author_name in author_list:
+                        self.add_author(author_name.strip())
+                if words:
+                    # Format the lyrics
+                    words = strip_rtf(words, self.encoding)
+                    for verse in words.split(u'\n\n'):
+                        # TODO: recognize note-part as well and put into comments-section
+                        # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
+                        verse_split = verse.strip().split(u'\n',  1)
+                        verse_type = VerseType.Tags[VerseType.Verse]
+                        first_line_is_tag = False
+                        for type in VerseType.Names+['tag',  'slide']: # doesnt cover tag, slide
+                            type = type.lower()
+                            ew_tag = verse_split[0].strip().lower()
+                            if ew_tag.startswith(type):
+                                #print ew_tag
+                                verse_type = type[0]
+                                if type == 'tag' or type == 'slide':
+                                    verse_type = VerseType.Tags[VerseType.Other]
+                                first_line_is_tag = True
+                                if len(ew_tag) > len(type): # tag is followed by number and/or note
+                                    p = re.compile(r'[0-9]+')
+                                    m = re.search(p,  ew_tag)
+                                    if m:
+                                        number = m.group()
+                                        verse_type +=number
+                                        
+                                    p = re.compile(r'\(.*\)')
+                                    m = re.search(p,  ew_tag)
+                                    if m:
+                                        self.comments += ew_tag+'\n'
+                                break
+                                
+                        self.add_verse(
+                            verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1
+                            verse_type)
+                if len(self.comments) > 5:
+                    self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport',
+                            '\n[above are Song Tags with notes imported from EasyWorship]'))
+                if self.stop_import_flag:
+                    break
+                if not self.finish():
+                    self.log_error(self.import_source)
+        db_file.close()
+        self.memo_file.close()
+
+    def find_field(self, field_name):
+        return [i for i, x in enumerate(self.field_descs)
+            if x.name == field_name][0]
+
+    def set_record_struct(self, field_descs):
+        # Begin with empty field struct list
+        fsl = ['>']
+        for field_desc in field_descs:
+            if field_desc.type == 1:
+                # string
+                fsl.append('%ds' % field_desc.size)
+            elif field_desc.type == 3:
+                # 16-bit int
+                fsl.append('H')
+            elif field_desc.type == 4:
+                # 32-bit int
+                fsl.append('I')
+            elif field_desc.type == 9:
+                # Logical
+                fsl.append('B')
+            elif field_desc.type == 0x0c:
+                # Memo
+                fsl.append('%ds' % field_desc.size)
+            elif field_desc.type == 0x0d:
+                # Blob
+                fsl.append('%ds' % field_desc.size)
+            elif field_desc.type == 0x15:
+                # Timestamp
+                fsl.append('Q')
+            else:
+                fsl.append('%ds' % field_desc.size)
+        self.record_struct = struct.Struct(''.join(fsl))
+        self.field_descs = field_descs
+
+    def get_field(self, field_desc_index):
+        field = self.fields[field_desc_index]
+        field_desc = self.field_descs[field_desc_index]
+        # Return None in case of 'blank' entries
+        if isinstance(field, str):
+            if len(field.rstrip('\0')) == 0:
+                return None
+        elif field == 0:
+            return None
+        # Format the field depending on the field type
+        if field_desc.type == 1:
+            # string
+            return field.rstrip('\0').decode(self.encoding)
+        elif field_desc.type == 3:
+            # 16-bit int
+            return field ^ 0x8000
+        elif field_desc.type == 4:
+            # 32-bit int
+            return field ^ 0x80000000
+        elif field_desc.type == 9:
+            # Logical
+            return (field ^ 0x80 == 1)
+        elif field_desc.type == 0x0c or field_desc.type == 0x0d:
+            # Memo or Blob
+            block_start, blob_size = \
+                struct.unpack_from('<II', field, len(field)-10)
+            sub_block = block_start & 0xff
+            block_start &= ~0xff
+            self.memo_file.seek(block_start)
+            memo_block_type, = struct.unpack('b', self.memo_file.read(1))
+            if memo_block_type == 2:
+                self.memo_file.seek(8, os.SEEK_CUR)
+            elif memo_block_type == 3:
+                if sub_block > 63:
+                    return u''
+                self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR)
+                sub_block_start, = struct.unpack('B', self.memo_file.read(1))
+                self.memo_file.seek(block_start + (sub_block_start * 16))
+            else:
+                return u''
+            return self.memo_file.read(blob_size)
+        else:
+            return 0

From 31dd4945bae57b2aeaa2f0310290d40824413b6d Mon Sep 17 00:00:00 2001
From: Benny <benny.wasty@googlemail.com>
Date: Tue, 21 Jun 2011 07:55:11 +0200
Subject: [PATCH 2/7] fixed line endings

---
 openlp/plugins/songs/lib/ewimport.py | 759 +++++++++++++--------------
 1 file changed, 379 insertions(+), 380 deletions(-)

diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py
index 2431743d6..fb82ab347 100644
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@@ -1,380 +1,379 @@
-# -*- coding: utf-8 -*-
-# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
-
-###############################################################################
-# OpenLP - Open Source Lyrics Projection                                      #
-# --------------------------------------------------------------------------- #
-# Copyright (c) 2008-2011 Raoul Snyman                                        #
-# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan      #
-# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan,      #
-# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias     #
-# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,    #
-# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund             #
-# --------------------------------------------------------------------------- #
-# This program is free software; you can redistribute it and/or modify it     #
-# under the terms of the GNU General Public License as published by the Free  #
-# Software Foundation; version 2 of the License.                              #
-#                                                                             #
-# This program is distributed in the hope that it will be useful, but WITHOUT #
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
-# more details.                                                               #
-#                                                                             #
-# You should have received a copy of the GNU General Public License along     #
-# with this program; if not, write to the Free Software Foundation, Inc., 59  #
-# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
-###############################################################################
-"""
-The :mod:`ewimport` module provides the functionality for importing
-EasyWorship song databases into the current installation database.
-"""
-
-import os
-import struct
-import re
-
-from openlp.core.lib import translate
-from openlp.core.ui.wizard import WizardStrings
-from openlp.plugins.songs.lib import VerseType
-from openlp.plugins.songs.lib import retrieve_windows_encoding
-from songimport import SongImport
-
-def strip_rtf(blob, encoding):
-    depth = 0
-    control = False
-    clear_text = []
-    control_word = []
-    for c in blob:
-        if control:
-            # for delimiters, set control to False
-            if c == '{':
-                if len(control_word) > 0:
-                    depth += 1
-                control = False
-            elif c == '}':
-                if len(control_word) > 0:
-                    depth -= 1
-                control = False
-            elif c == '\\':
-                new_control = (len(control_word) > 0)
-                control = False
-            elif c.isspace():
-                control = False
-            else:
-                control_word.append(c)
-                if len(control_word) == 3 and control_word[0] == '\'':
-                    control = False
-            if not control:
-                if len(control_word) == 0:
-                    if c == '{' or c == '}' or c == '\\':
-                        clear_text.append(c)
-                else:
-                    control_str = ''.join(control_word)
-                    if control_str == 'par' or control_str == 'line':
-                        clear_text.append(u'\n')
-                    elif control_str == 'tab':
-                        clear_text.append(u'\t')
-                    # Prefer the encoding specified by the RTF data to that
-                    # specified by the Paradox table header
-                    # West European encoding
-                    elif control_str == 'fcharset0':
-                        encoding = u'cp1252'
-                    # Greek encoding
-                    elif control_str == 'fcharset161':
-                        encoding = u'cp1253'
-                    # Turkish encoding
-                    elif control_str == 'fcharset162':
-                        encoding = u'cp1254'
-                    # Vietnamese encoding
-                    elif control_str == 'fcharset163':
-                        encoding = u'cp1258'
-                    # Hebrew encoding
-                    elif control_str == 'fcharset177':
-                        encoding = u'cp1255'
-                    # Arabic encoding
-                    elif control_str == 'fcharset178':
-                        encoding = u'cp1256'
-                    # Baltic encoding
-                    elif control_str == 'fcharset186':
-                        encoding = u'cp1257'
-                    # Cyrillic encoding
-                    elif control_str == 'fcharset204':
-                        encoding = u'cp1251'
-                    # Thai encoding
-                    elif control_str == 'fcharset222':
-                        encoding = u'cp874'
-                    # Central+East European encoding
-                    elif control_str == 'fcharset238':
-                        encoding = u'cp1250'
-                    elif control_str[0] == '\'':
-                        s = chr(int(control_str[1:3], 16))
-                        clear_text.append(s.decode(encoding))
-                    del control_word[:]
-            if c == '\\' and new_control:
-                control = True
-        elif c == '{':
-            depth += 1
-        elif c == '}':
-            depth -= 1
-        elif depth > 2:
-            continue
-        elif c == '\n' or c == '\r':
-            continue
-        elif c == '\\':
-            control = True
-        else:
-            clear_text.append(c)
-    return u''.join(clear_text)
-
-class FieldDescEntry:
-    def __init__(self, name, type, size):
-        self.name = name
-        self.type = type
-        self.size = size
-
-
-class EasyWorshipSongImport(SongImport):
-    """
-    The :class:`EasyWorshipSongImport` class provides OpenLP with the
-    ability to import EasyWorship song files.
-    """
-    def __init__(self, manager, **kwargs):
-        SongImport.__init__(self, manager, **kwargs)
-
-    def do_import(self):
-        # Open the DB and MB files if they exist
-        import_source_mb = self.import_source.replace('.DB', '.MB')
-        if not os.path.isfile(self.import_source):
-            return
-        if not os.path.isfile(import_source_mb):
-            return
-        db_size = os.path.getsize(self.import_source)
-        if db_size < 0x800:
-            return
-        db_file = open(self.import_source, 'rb')
-        self.memo_file = open(import_source_mb, 'rb')
-        # Don't accept files that are clearly not paradox files
-        record_size, header_size, block_size, first_block, num_fields \
-            = struct.unpack('<hhxb8xh17xh', db_file.read(35))
-        if header_size != 0x800 or block_size < 1 or block_size > 4:
-            db_file.close()
-            self.memo_file.close()
-            return
-        # Take a stab at how text is encoded
-        self.encoding = u'cp1252'
-        db_file.seek(106)
-        code_page, = struct.unpack('<h', db_file.read(2))
-        if code_page == 852:
-            self.encoding = u'cp1250'
-        # The following codepage to actual encoding mappings have not been
-        # observed, but merely guessed. Actual example files are needed.
-        elif code_page == 737:
-            self.encoding = u'cp1253'
-        elif code_page == 775:
-            self.encoding = u'cp1257'
-        elif code_page == 855:
-            self.encoding = u'cp1251'
-        elif code_page == 857:
-            self.encoding = u'cp1254'
-        elif code_page == 866:
-            self.encoding = u'cp1251'
-        elif code_page == 869:
-            self.encoding = u'cp1253'
-        elif code_page == 862:
-            self.encoding = u'cp1255'
-        elif code_page == 874:
-            self.encoding = u'cp874'
-        self.encoding = retrieve_windows_encoding(self.encoding)
-        if not self.encoding:
-            return
-        # There does not appear to be a _reliable_ way of getting the number
-        # of songs/records, so let's use file blocks for measuring progress.
-        total_blocks = (db_size - header_size) / (block_size * 1024)
-        self.import_wizard.progressBar.setMaximum(total_blocks)
-        # Read the field description information
-        db_file.seek(120)
-        field_info = db_file.read(num_fields * 2)
-        db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR)
-        field_names = db_file.read(header_size - db_file.tell()).split('\0',
-            num_fields)
-        field_names.pop()
-        field_descs = []
-        for i, field_name in enumerate(field_names):
-            field_type, field_size = struct.unpack_from('BB',
-                field_info, i * 2)
-            field_descs.append(FieldDescEntry(field_name, field_type,
-                field_size))
-        self.set_record_struct(field_descs)
-        # Pick out the field description indexes we will need
-        try:
-            success = True
-            fi_title = self.find_field(u'Title')
-            fi_author = self.find_field(u'Author')
-            fi_copy = self.find_field(u'Copyright')
-            fi_admin = self.find_field(u'Administrator')
-            fi_words = self.find_field(u'Words')
-            fi_ccli = self.find_field(u'Song Number')
-        except IndexError:
-            # This is the wrong table
-            success = False
-        # Loop through each block of the file
-        cur_block = first_block
-        while cur_block != 0 and success:
-            db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size))
-            cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6))
-            rec_count = (rec_count + record_size) / record_size
-            # Loop through each record within the current block
-            for i in range(rec_count):
-                if self.stop_import_flag:
-                    break
-                raw_record = db_file.read(record_size)
-                self.fields = self.record_struct.unpack(raw_record)
-                self.set_defaults()
-                self.title = self.get_field(fi_title)
-                # Get remaining fields.
-                copy = self.get_field(fi_copy)
-                admin = self.get_field(fi_admin)
-                ccli = self.get_field(fi_ccli)
-                authors = self.get_field(fi_author)
-                words = self.get_field(fi_words)
-                # Set the SongImport object members.
-                if copy:
-                    self.copyright = copy
-                if admin:
-                    if copy:
-                        self.copyright += u', '
-                    self.copyright += \
-                        unicode(translate('SongsPlugin.EasyWorshipSongImport',
-                            'Administered by %s')) % admin
-                if ccli:
-                    self.ccli_number = ccli
-                if authors:
-                    # Split up the authors
-                    author_list = authors.split(u'/')
-                    if len(author_list) < 2:
-                        author_list = authors.split(u';')
-                    if len(author_list) < 2:
-                        author_list = authors.split(u',')
-                    for author_name in author_list:
-                        self.add_author(author_name.strip())
-                if words:
-                    # Format the lyrics
-                    words = strip_rtf(words, self.encoding)
-                    for verse in words.split(u'\n\n'):
-                        # TODO: recognize note-part as well and put into comments-section
-                        # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
-                        verse_split = verse.strip().split(u'\n',  1)
-                        verse_type = VerseType.Tags[VerseType.Verse]
-                        first_line_is_tag = False
-                        for type in VerseType.Names+['tag',  'slide']: # doesnt cover tag, slide
-                            type = type.lower()
-                            ew_tag = verse_split[0].strip().lower()
-                            if ew_tag.startswith(type):
-                                #print ew_tag
-                                verse_type = type[0]
-                                if type == 'tag' or type == 'slide':
-                                    verse_type = VerseType.Tags[VerseType.Other]
-                                first_line_is_tag = True
-                                if len(ew_tag) > len(type): # tag is followed by number and/or note
-                                    p = re.compile(r'[0-9]+')
-                                    m = re.search(p,  ew_tag)
-                                    if m:
-                                        number = m.group()
-                                        verse_type +=number
-                                        
-                                    p = re.compile(r'\(.*\)')
-                                    m = re.search(p,  ew_tag)
-                                    if m:
-                                        self.comments += ew_tag+'\n'
-                                break
-                                
-                        self.add_verse(
-                            verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1
-                            verse_type)
-                if len(self.comments) > 5:
-                    self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport',
-                            '\n[above are Song Tags with notes imported from EasyWorship]'))
-                if self.stop_import_flag:
-                    break
-                if not self.finish():
-                    self.log_error(self.import_source)
-        db_file.close()
-        self.memo_file.close()
-
-    def find_field(self, field_name):
-        return [i for i, x in enumerate(self.field_descs)
-            if x.name == field_name][0]
-
-    def set_record_struct(self, field_descs):
-        # Begin with empty field struct list
-        fsl = ['>']
-        for field_desc in field_descs:
-            if field_desc.type == 1:
-                # string
-                fsl.append('%ds' % field_desc.size)
-            elif field_desc.type == 3:
-                # 16-bit int
-                fsl.append('H')
-            elif field_desc.type == 4:
-                # 32-bit int
-                fsl.append('I')
-            elif field_desc.type == 9:
-                # Logical
-                fsl.append('B')
-            elif field_desc.type == 0x0c:
-                # Memo
-                fsl.append('%ds' % field_desc.size)
-            elif field_desc.type == 0x0d:
-                # Blob
-                fsl.append('%ds' % field_desc.size)
-            elif field_desc.type == 0x15:
-                # Timestamp
-                fsl.append('Q')
-            else:
-                fsl.append('%ds' % field_desc.size)
-        self.record_struct = struct.Struct(''.join(fsl))
-        self.field_descs = field_descs
-
-    def get_field(self, field_desc_index):
-        field = self.fields[field_desc_index]
-        field_desc = self.field_descs[field_desc_index]
-        # Return None in case of 'blank' entries
-        if isinstance(field, str):
-            if len(field.rstrip('\0')) == 0:
-                return None
-        elif field == 0:
-            return None
-        # Format the field depending on the field type
-        if field_desc.type == 1:
-            # string
-            return field.rstrip('\0').decode(self.encoding)
-        elif field_desc.type == 3:
-            # 16-bit int
-            return field ^ 0x8000
-        elif field_desc.type == 4:
-            # 32-bit int
-            return field ^ 0x80000000
-        elif field_desc.type == 9:
-            # Logical
-            return (field ^ 0x80 == 1)
-        elif field_desc.type == 0x0c or field_desc.type == 0x0d:
-            # Memo or Blob
-            block_start, blob_size = \
-                struct.unpack_from('<II', field, len(field)-10)
-            sub_block = block_start & 0xff
-            block_start &= ~0xff
-            self.memo_file.seek(block_start)
-            memo_block_type, = struct.unpack('b', self.memo_file.read(1))
-            if memo_block_type == 2:
-                self.memo_file.seek(8, os.SEEK_CUR)
-            elif memo_block_type == 3:
-                if sub_block > 63:
-                    return u''
-                self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR)
-                sub_block_start, = struct.unpack('B', self.memo_file.read(1))
-                self.memo_file.seek(block_start + (sub_block_start * 16))
-            else:
-                return u''
-            return self.memo_file.read(blob_size)
-        else:
-            return 0
+# -*- coding: utf-8 -*-
+# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
+
+###############################################################################
+# OpenLP - Open Source Lyrics Projection                                      #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2011 Raoul Snyman                                        #
+# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan      #
+# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan,      #
+# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias     #
+# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,    #
+# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund             #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 59  #
+# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
+###############################################################################
+"""
+The :mod:`ewimport` module provides the functionality for importing
+EasyWorship song databases into the current installation database.
+"""
+
+import os
+import struct
+import re
+
+from openlp.core.lib import translate
+from openlp.core.ui.wizard import WizardStrings
+from openlp.plugins.songs.lib import VerseType
+from openlp.plugins.songs.lib import retrieve_windows_encoding
+from songimport import SongImport
+
+def strip_rtf(blob, encoding):
+    depth = 0
+    control = False
+    clear_text = []
+    control_word = []
+    for c in blob:
+        if control:
+            # for delimiters, set control to False
+            if c == '{':
+                if len(control_word) > 0:
+                    depth += 1
+                control = False
+            elif c == '}':
+                if len(control_word) > 0:
+                    depth -= 1
+                control = False
+            elif c == '\\':
+                new_control = (len(control_word) > 0)
+                control = False
+            elif c.isspace():
+                control = False
+            else:
+                control_word.append(c)
+                if len(control_word) == 3 and control_word[0] == '\'':
+                    control = False
+            if not control:
+                if len(control_word) == 0:
+                    if c == '{' or c == '}' or c == '\\':
+                        clear_text.append(c)
+                else:
+                    control_str = ''.join(control_word)
+                    if control_str == 'par' or control_str == 'line':
+                        clear_text.append(u'\n')
+                    elif control_str == 'tab':
+                        clear_text.append(u'\t')
+                    # Prefer the encoding specified by the RTF data to that
+                    # specified by the Paradox table header
+                    # West European encoding
+                    elif control_str == 'fcharset0':
+                        encoding = u'cp1252'
+                    # Greek encoding
+                    elif control_str == 'fcharset161':
+                        encoding = u'cp1253'
+                    # Turkish encoding
+                    elif control_str == 'fcharset162':
+                        encoding = u'cp1254'
+                    # Vietnamese encoding
+                    elif control_str == 'fcharset163':
+                        encoding = u'cp1258'
+                    # Hebrew encoding
+                    elif control_str == 'fcharset177':
+                        encoding = u'cp1255'
+                    # Arabic encoding
+                    elif control_str == 'fcharset178':
+                        encoding = u'cp1256'
+                    # Baltic encoding
+                    elif control_str == 'fcharset186':
+                        encoding = u'cp1257'
+                    # Cyrillic encoding
+                    elif control_str == 'fcharset204':
+                        encoding = u'cp1251'
+                    # Thai encoding
+                    elif control_str == 'fcharset222':
+                        encoding = u'cp874'
+                    # Central+East European encoding
+                    elif control_str == 'fcharset238':
+                        encoding = u'cp1250'
+                    elif control_str[0] == '\'':
+                        s = chr(int(control_str[1:3], 16))
+                        clear_text.append(s.decode(encoding))
+                    del control_word[:]
+            if c == '\\' and new_control:
+                control = True
+        elif c == '{':
+            depth += 1
+        elif c == '}':
+            depth -= 1
+        elif depth > 2:
+            continue
+        elif c == '\n' or c == '\r':
+            continue
+        elif c == '\\':
+            control = True
+        else:
+            clear_text.append(c)
+    return u''.join(clear_text)
+
+class FieldDescEntry:
+    def __init__(self, name, type, size):
+        self.name = name
+        self.type = type
+        self.size = size
+
+
+class EasyWorshipSongImport(SongImport):
+    """
+    The :class:`EasyWorshipSongImport` class provides OpenLP with the
+    ability to import EasyWorship song files.
+    """
+    def __init__(self, manager, **kwargs):
+        SongImport.__init__(self, manager, **kwargs)
+
+    def do_import(self):
+        # Open the DB and MB files if they exist
+        import_source_mb = self.import_source.replace('.DB', '.MB')
+        if not os.path.isfile(self.import_source):
+            return
+        if not os.path.isfile(import_source_mb):
+            return
+        db_size = os.path.getsize(self.import_source)
+        if db_size < 0x800:
+            return
+        db_file = open(self.import_source, 'rb')
+        self.memo_file = open(import_source_mb, 'rb')
+        # Don't accept files that are clearly not paradox files
+        record_size, header_size, block_size, first_block, num_fields \
+            = struct.unpack('<hhxb8xh17xh', db_file.read(35))
+        if header_size != 0x800 or block_size < 1 or block_size > 4:
+            db_file.close()
+            self.memo_file.close()
+            return
+        # Take a stab at how text is encoded
+        self.encoding = u'cp1252'
+        db_file.seek(106)
+        code_page, = struct.unpack('<h', db_file.read(2))
+        if code_page == 852:
+            self.encoding = u'cp1250'
+        # The following codepage to actual encoding mappings have not been
+        # observed, but merely guessed. Actual example files are needed.
+        elif code_page == 737:
+            self.encoding = u'cp1253'
+        elif code_page == 775:
+            self.encoding = u'cp1257'
+        elif code_page == 855:
+            self.encoding = u'cp1251'
+        elif code_page == 857:
+            self.encoding = u'cp1254'
+        elif code_page == 866:
+            self.encoding = u'cp1251'
+        elif code_page == 869:
+            self.encoding = u'cp1253'
+        elif code_page == 862:
+            self.encoding = u'cp1255'
+        elif code_page == 874:
+            self.encoding = u'cp874'
+        self.encoding = retrieve_windows_encoding(self.encoding)
+        if not self.encoding:
+            return
+        # There does not appear to be a _reliable_ way of getting the number
+        # of songs/records, so let's use file blocks for measuring progress.
+        total_blocks = (db_size - header_size) / (block_size * 1024)
+        self.import_wizard.progressBar.setMaximum(total_blocks)
+        # Read the field description information
+        db_file.seek(120)
+        field_info = db_file.read(num_fields * 2)
+        db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR)
+        field_names = db_file.read(header_size - db_file.tell()).split('\0',
+            num_fields)
+        field_names.pop()
+        field_descs = []
+        for i, field_name in enumerate(field_names):
+            field_type, field_size = struct.unpack_from('BB',
+                field_info, i * 2)
+            field_descs.append(FieldDescEntry(field_name, field_type,
+                field_size))
+        self.set_record_struct(field_descs)
+        # Pick out the field description indexes we will need
+        try:
+            success = True
+            fi_title = self.find_field(u'Title')
+            fi_author = self.find_field(u'Author')
+            fi_copy = self.find_field(u'Copyright')
+            fi_admin = self.find_field(u'Administrator')
+            fi_words = self.find_field(u'Words')
+            fi_ccli = self.find_field(u'Song Number')
+        except IndexError:
+            # This is the wrong table
+            success = False
+        # Loop through each block of the file
+        cur_block = first_block
+        while cur_block != 0 and success:
+            db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size))
+            cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6))
+            rec_count = (rec_count + record_size) / record_size
+            # Loop through each record within the current block
+            for i in range(rec_count):
+                if self.stop_import_flag:
+                    break
+                raw_record = db_file.read(record_size)
+                self.fields = self.record_struct.unpack(raw_record)
+                self.set_defaults()
+                self.title = self.get_field(fi_title)
+                # Get remaining fields.
+                copy = self.get_field(fi_copy)
+                admin = self.get_field(fi_admin)
+                ccli = self.get_field(fi_ccli)
+                authors = self.get_field(fi_author)
+                words = self.get_field(fi_words)
+                # Set the SongImport object members.
+                if copy:
+                    self.copyright = copy
+                if admin:
+                    if copy:
+                        self.copyright += u', '
+                    self.copyright += \
+                        unicode(translate('SongsPlugin.EasyWorshipSongImport',
+                            'Administered by %s')) % admin
+                if ccli:
+                    self.ccli_number = ccli
+                if authors:
+                    # Split up the authors
+                    author_list = authors.split(u'/')
+                    if len(author_list) < 2:
+                        author_list = authors.split(u';')
+                    if len(author_list) < 2:
+                        author_list = authors.split(u',')
+                    for author_name in author_list:
+                        self.add_author(author_name.strip())
+                if words:
+                    # Format the lyrics
+                    words = strip_rtf(words, self.encoding)
+                    for verse in words.split(u'\n\n'):
+                        # TODO: recognize note-part as well and put into comments-section
+                        # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
+                        verse_split = verse.strip().split(u'\n',  1)
+                        verse_type = VerseType.Tags[VerseType.Verse]
+                        first_line_is_tag = False
+                        for type in VerseType.Names+['tag',  'slide']: # doesnt cover tag, slide
+                            type = type.lower()
+                            ew_tag = verse_split[0].strip().lower()
+                            if ew_tag.startswith(type):
+                                #print ew_tag
+                                verse_type = type[0]
+                                if type == 'tag' or type == 'slide':
+                                    verse_type = VerseType.Tags[VerseType.Other]
+                                first_line_is_tag = True
+                                if len(ew_tag) > len(type): # tag is followed by number and/or note
+                                    p = re.compile(r'[0-9]+')
+                                    m = re.search(p,  ew_tag)
+                                    if m:
+                                        number = m.group()
+                                        verse_type +=number
+                                        
+                                    p = re.compile(r'\(.*\)')
+                                    m = re.search(p,  ew_tag)
+                                    if m:
+                                        self.comments += ew_tag+'\n'
+                                break
+                        self.add_verse(
+                            verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1
+                            verse_type)
+                if len(self.comments) > 5:
+                    self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport',
+                            '\n[above are Song Tags with notes imported from EasyWorship]'))
+                if self.stop_import_flag:
+                    break
+                if not self.finish():
+                    self.log_error(self.import_source)
+        db_file.close()
+        self.memo_file.close()
+
+    def find_field(self, field_name):
+        return [i for i, x in enumerate(self.field_descs)
+            if x.name == field_name][0]
+
+    def set_record_struct(self, field_descs):
+        # Begin with empty field struct list
+        fsl = ['>']
+        for field_desc in field_descs:
+            if field_desc.type == 1:
+                # string
+                fsl.append('%ds' % field_desc.size)
+            elif field_desc.type == 3:
+                # 16-bit int
+                fsl.append('H')
+            elif field_desc.type == 4:
+                # 32-bit int
+                fsl.append('I')
+            elif field_desc.type == 9:
+                # Logical
+                fsl.append('B')
+            elif field_desc.type == 0x0c:
+                # Memo
+                fsl.append('%ds' % field_desc.size)
+            elif field_desc.type == 0x0d:
+                # Blob
+                fsl.append('%ds' % field_desc.size)
+            elif field_desc.type == 0x15:
+                # Timestamp
+                fsl.append('Q')
+            else:
+                fsl.append('%ds' % field_desc.size)
+        self.record_struct = struct.Struct(''.join(fsl))
+        self.field_descs = field_descs
+
+    def get_field(self, field_desc_index):
+        field = self.fields[field_desc_index]
+        field_desc = self.field_descs[field_desc_index]
+        # Return None in case of 'blank' entries
+        if isinstance(field, str):
+            if len(field.rstrip('\0')) == 0:
+                return None
+        elif field == 0:
+            return None
+        # Format the field depending on the field type
+        if field_desc.type == 1:
+            # string
+            return field.rstrip('\0').decode(self.encoding)
+        elif field_desc.type == 3:
+            # 16-bit int
+            return field ^ 0x8000
+        elif field_desc.type == 4:
+            # 32-bit int
+            return field ^ 0x80000000
+        elif field_desc.type == 9:
+            # Logical
+            return (field ^ 0x80 == 1)
+        elif field_desc.type == 0x0c or field_desc.type == 0x0d:
+            # Memo or Blob
+            block_start, blob_size = \
+                struct.unpack_from('<II', field, len(field)-10)
+            sub_block = block_start & 0xff
+            block_start &= ~0xff
+            self.memo_file.seek(block_start)
+            memo_block_type, = struct.unpack('b', self.memo_file.read(1))
+            if memo_block_type == 2:
+                self.memo_file.seek(8, os.SEEK_CUR)
+            elif memo_block_type == 3:
+                if sub_block > 63:
+                    return u''
+                self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR)
+                sub_block_start, = struct.unpack('B', self.memo_file.read(1))
+                self.memo_file.seek(block_start + (sub_block_start * 16))
+            else:
+                return u''
+            return self.memo_file.read(blob_size)
+        else:
+            return 0

From bc808ade93762cf5d3a1a4501e0f3378a1cee4c3 Mon Sep 17 00:00:00 2001
From: Benny <benny.wasty@googlemail.com>
Date: Sat, 2 Jul 2011 00:45:27 +0200
Subject: [PATCH 3/7] EasyWorshipSongImport: use tag from previous slide for
 slides without tag, fix regex for notes

---
 openlp/plugins/songs/lib/ewimport.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py
index c207a07d2..95533ba94 100644
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@@ -261,6 +261,7 @@ class EasyWorshipSongImport(SongImport):
                     # Format the lyrics
                     words = strip_rtf(words, self.encoding) # TODO: convert rtf instead of stripping?
                     p = re.compile(r'\n *?\n[\n ]*') # at least two newlines, with zero or more space characters between them
+                    verse_type = VerseType.Tags[VerseType.Verse] # TODO!!!: use previous verse type....
                     for verse in p.split(words):
                     #for verse in words.split(u'\n\n'):
                         # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
@@ -268,13 +269,11 @@ class EasyWorshipSongImport(SongImport):
                         if len(verse) == 0:
                             continue
                         verse_split = verse.split(u'\n',  1)
-                        verse_type = VerseType.Tags[VerseType.Verse]
                         first_line_is_tag = False
                         for type in VerseType.Names+['tag',  'slide']: # doesnt cover tag, slide
                             type = type.lower()
                             ew_tag = verse_split[0].strip().lower()
                             if ew_tag.startswith(type):
-                                #print ew_tag
                                 verse_type = type[0]
                                 if type == 'tag' or type == 'slide':
                                     verse_type = VerseType.Tags[VerseType.Other]
@@ -286,7 +285,7 @@ class EasyWorshipSongImport(SongImport):
                                         number = m.group()
                                         verse_type +=number
 
-                                    p = re.compile(r'\(.*\)')
+                                    p = re.compile(r'\(.*?\)')
                                     m = re.search(p,  ew_tag)
                                     if m:
                                         self.comments += ew_tag+'\n'

From 4bf45ad2defe7018062c65abdc1b586e7d241c2e Mon Sep 17 00:00:00 2001
From: Benny <benny.wasty@googlemail.com>
Date: Mon, 4 Jul 2011 22:51:43 +0200
Subject: [PATCH 4/7] ewimport: workaround for RTF stripping bug

---
 openlp/plugins/songs/lib/ewimport.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py
index 95533ba94..a50c97f47 100644
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@@ -44,6 +44,14 @@ def strip_rtf(blob, encoding):
     control = False
     clear_text = []
     control_word = []
+    
+    # workaround for \tx bug: remove one pair of curly braces if \tx is encountered
+    p = re.compile(r'\{\\tx[^}]*\}')
+    m = p.search(blob)
+    if m:
+        # start and end indices of match are curly braces - filter them out
+        blob = ''.join([blob[i] for i in xrange(len(blob)) if i != m.start() and i !=m.end()])
+    
     for c in blob:
         if control:
             # for delimiters, set control to False

From 3c0c9c5b781d80899f1b73543a085c779bfc9ac8 Mon Sep 17 00:00:00 2001
From: Benny <benny.wasty@googlemail.com>
Date: Tue, 5 Jul 2011 00:55:57 +0200
Subject: [PATCH 5/7] EasyWorship importer: some work to create more reasonable
 verse numbers if EW tags are missing or without numbers

---
 openlp/plugins/songs/lib/ewimport.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py
index a50c97f47..18b87f9c0 100644
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@@ -267,18 +267,18 @@ class EasyWorshipSongImport(SongImport):
                         self.add_author(author_name.strip())
                 if words:
                     # Format the lyrics
-                    words = strip_rtf(words, self.encoding) # TODO: convert rtf instead of stripping?
-                    p = re.compile(r'\n *?\n[\n ]*') # at least two newlines, with zero or more space characters between them
-                    verse_type = VerseType.Tags[VerseType.Verse] # TODO!!!: use previous verse type....
+                    words = strip_rtf(words, self.encoding) # TODO: convert rtf to display tags?
+                    # regex: at least two newlines, with zero or more space characters between them
+                    p = re.compile(r'\n *?\n[\n ]*') 
+                    verse_type = VerseType.Tags[VerseType.Verse]
                     for verse in p.split(words):
-                    #for verse in words.split(u'\n\n'):
-                        # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
                         verse = verse.strip()
                         if len(verse) == 0:
                             continue
                         verse_split = verse.split(u'\n',  1)
                         first_line_is_tag = False
-                        for type in VerseType.Names+['tag',  'slide']: # doesnt cover tag, slide
+                        # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
+                        for type in VerseType.Names+['tag',  'slide']: 
                             type = type.lower()
                             ew_tag = verse_split[0].strip().lower()
                             if ew_tag.startswith(type):
@@ -286,20 +286,24 @@ class EasyWorshipSongImport(SongImport):
                                 if type == 'tag' or type == 'slide':
                                     verse_type = VerseType.Tags[VerseType.Other]
                                 first_line_is_tag = True
+                                number_found = False
                                 if len(ew_tag) > len(type): # tag is followed by number and/or note
                                     p = re.compile(r'[0-9]+')
                                     m = re.search(p,  ew_tag)
                                     if m:
                                         number = m.group()
                                         verse_type +=number
+                                        number_found = True
 
                                     p = re.compile(r'\(.*?\)')
                                     m = re.search(p,  ew_tag)
                                     if m:
                                         self.comments += ew_tag+'\n'
+                                if not number_found:
+                                    verse_type += '1'
                                 break
                         self.add_verse(
-                            verse_split[-1].strip() if first_line_is_tag else verse, # TODO: hacky: -1
+                            verse_split[-1].strip() if first_line_is_tag else verse, 
                             verse_type)
                 if len(self.comments) > 5:
                     self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport',

From 6436b05240635a5b5c3fd4675e32fad3bf46c7d4 Mon Sep 17 00:00:00 2001
From: Benny <benny.wasty@googlemail.com>
Date: Tue, 5 Jul 2011 12:50:55 +0200
Subject: [PATCH 6/7] changes from review (cosmetic & regex performance)

---
 openlp/plugins/songs/lib/ewimport.py | 54 +++++++++++++++-------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py
index 18b87f9c0..732c6e4f0 100644
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@@ -39,18 +39,25 @@ from openlp.plugins.songs.lib import VerseType
 from openlp.plugins.songs.lib import retrieve_windows_encoding
 from songimport import SongImport
 
+RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')
+# regex: at least two newlines, can have spaces between them
+SLIDE_BREAK_REGEX = re.compile(r'\n *?\n[\n ]*')
+NUMBER_REGEX = re.compile(r'[0-9]+')
+NOTE_REGEX = re.compile(r'\(.*?\)')
+
 def strip_rtf(blob, encoding):
     depth = 0
     control = False
     clear_text = []
     control_word = []
     
-    # workaround for \tx bug: remove one pair of curly braces if \tx is encountered
-    p = re.compile(r'\{\\tx[^}]*\}')
-    m = p.search(blob)
-    if m:
+    # workaround for \tx bug: remove one pair of curly braces 
+    # if \tx is encountered
+    match = RTF_STRIPPING_REGEX.search(blob)
+    if match:
         # start and end indices of match are curly braces - filter them out
-        blob = ''.join([blob[i] for i in xrange(len(blob)) if i != m.start() and i !=m.end()])
+        blob = ''.join([blob[i] for i in xrange(len(blob)) 
+            if i != match.start() and i !=match.end()])
     
     for c in blob:
         if control:
@@ -267,17 +274,16 @@ class EasyWorshipSongImport(SongImport):
                         self.add_author(author_name.strip())
                 if words:
                     # Format the lyrics
-                    words = strip_rtf(words, self.encoding) # TODO: convert rtf to display tags?
-                    # regex: at least two newlines, with zero or more space characters between them
-                    p = re.compile(r'\n *?\n[\n ]*') 
+                    words = strip_rtf(words, self.encoding)
                     verse_type = VerseType.Tags[VerseType.Verse]
-                    for verse in p.split(words):
+                    for verse in SLIDE_BREAK_REGEX.split(words):
                         verse = verse.strip()
                         if len(verse) == 0:
                             continue
-                        verse_split = verse.split(u'\n',  1)
+                        verse_split = verse.split(u'\n', 1)
                         first_line_is_tag = False
-                        # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
+                        # EW tags: verse, chorus, pre-chorus, bridge, tag, 
+                        # intro, ending, slide
                         for type in VerseType.Names+['tag',  'slide']: 
                             type = type.lower()
                             ew_tag = verse_split[0].strip().lower()
@@ -287,27 +293,27 @@ class EasyWorshipSongImport(SongImport):
                                     verse_type = VerseType.Tags[VerseType.Other]
                                 first_line_is_tag = True
                                 number_found = False
-                                if len(ew_tag) > len(type): # tag is followed by number and/or note
-                                    p = re.compile(r'[0-9]+')
-                                    m = re.search(p,  ew_tag)
-                                    if m:
-                                        number = m.group()
+                                # check if tag is followed by number and/or note
+                                if len(ew_tag) > len(type): 
+                                    match = NUMBER_REGEX.search(ew_tag)
+                                    if match:
+                                        number = match.group()
                                         verse_type +=number
                                         number_found = True
-
-                                    p = re.compile(r'\(.*?\)')
-                                    m = re.search(p,  ew_tag)
-                                    if m:
-                                        self.comments += ew_tag+'\n'
+                                    match = NOTE_REGEX.search(ew_tag)
+                                    if match:
+                                        self.comments += ew_tag + u'\n'
                                 if not number_found:
-                                    verse_type += '1'
+                                    verse_type += u'1'
                                 break
                         self.add_verse(
                             verse_split[-1].strip() if first_line_is_tag else verse, 
                             verse_type)
                 if len(self.comments) > 5:
-                    self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport',
-                            '\n[above are Song Tags with notes imported from EasyWorship]'))
+                    self.comments += unicode(
+                        translate('SongsPlugin.EasyWorshipSongImport',
+                        '\n[above are Song Tags with notes imported from \
+                        EasyWorship]'))
                 if self.stop_import_flag:
                     break
                 if not self.finish():

From 1876d520ae4a23899210c6ca5efe3ee474a588ac Mon Sep 17 00:00:00 2001
From: Benny <benny.wasty@googlemail.com>
Date: Tue, 5 Jul 2011 14:00:34 +0200
Subject: [PATCH 7/7] review fixes

---
 openlp/plugins/songs/lib/ewimport.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py
index 732c6e4f0..448d629d5 100644
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@@ -278,18 +278,18 @@ class EasyWorshipSongImport(SongImport):
                     verse_type = VerseType.Tags[VerseType.Verse]
                     for verse in SLIDE_BREAK_REGEX.split(words):
                         verse = verse.strip()
-                        if len(verse) == 0:
+                        if not verse:
                             continue
                         verse_split = verse.split(u'\n', 1)
                         first_line_is_tag = False
                         # EW tags: verse, chorus, pre-chorus, bridge, tag, 
                         # intro, ending, slide
-                        for type in VerseType.Names+['tag',  'slide']: 
+                        for type in VerseType.Names+[u'tag', u'slide']: 
                             type = type.lower()
                             ew_tag = verse_split[0].strip().lower()
                             if ew_tag.startswith(type):
                                 verse_type = type[0]
-                                if type == 'tag' or type == 'slide':
+                                if type == u'tag' or type == u'slide':
                                     verse_type = VerseType.Tags[VerseType.Other]
                                 first_line_is_tag = True
                                 number_found = False
@@ -298,7 +298,7 @@ class EasyWorshipSongImport(SongImport):
                                     match = NUMBER_REGEX.search(ew_tag)
                                     if match:
                                         number = match.group()
-                                        verse_type +=number
+                                        verse_type += number
                                         number_found = True
                                     match = NOTE_REGEX.search(ew_tag)
                                     if match: