EasyWorship importer: added conversion of Tags - basically working, but some issues remain

This commit is contained in:
Benny 2011-06-21 07:40:53 +02:00
parent de769ce066
commit 2939151ff1

View File

@ -1,348 +1,380 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 # vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
############################################################################### ###############################################################################
# OpenLP - Open Source Lyrics Projection # # OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
# Copyright (c) 2008-2011 Raoul Snyman # # Copyright (c) 2008-2011 Raoul Snyman #
# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan # # Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan #
# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # # Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, #
# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # # Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias #
# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # # Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, #
# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # # Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund #
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it # # This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free # # under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. # # Software Foundation; version 2 of the License. #
# # # #
# This program is distributed in the hope that it will be useful, but WITHOUT # # This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. # # more details. #
# # # #
# You should have received a copy of the GNU General Public License along # # You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 # # with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Temple Place, Suite 330, Boston, MA 02111-1307 USA #
############################################################################### ###############################################################################
""" """
The :mod:`ewimport` module provides the functionality for importing The :mod:`ewimport` module provides the functionality for importing
EasyWorship song databases into the current installation database. EasyWorship song databases into the current installation database.
""" """
import os import os
import struct import struct
import re
from openlp.core.lib import translate
from openlp.core.ui.wizard import WizardStrings from openlp.core.lib import translate
from openlp.plugins.songs.lib import VerseType from openlp.core.ui.wizard import WizardStrings
from openlp.plugins.songs.lib import retrieve_windows_encoding from openlp.plugins.songs.lib import VerseType
from songimport import SongImport from openlp.plugins.songs.lib import retrieve_windows_encoding
from songimport import SongImport
def strip_rtf(blob, encoding):
depth = 0 def strip_rtf(blob, encoding):
control = False depth = 0
clear_text = [] control = False
control_word = [] clear_text = []
for c in blob: control_word = []
if control: for c in blob:
# for delimiters, set control to False if control:
if c == '{': # for delimiters, set control to False
if len(control_word) > 0: if c == '{':
depth += 1 if len(control_word) > 0:
control = False depth += 1
elif c == '}': control = False
if len(control_word) > 0: elif c == '}':
depth -= 1 if len(control_word) > 0:
control = False depth -= 1
elif c == '\\': control = False
new_control = (len(control_word) > 0) elif c == '\\':
control = False new_control = (len(control_word) > 0)
elif c.isspace(): control = False
control = False elif c.isspace():
else: control = False
control_word.append(c) else:
if len(control_word) == 3 and control_word[0] == '\'': control_word.append(c)
control = False if len(control_word) == 3 and control_word[0] == '\'':
if not control: control = False
if len(control_word) == 0: if not control:
if c == '{' or c == '}' or c == '\\': if len(control_word) == 0:
clear_text.append(c) if c == '{' or c == '}' or c == '\\':
else: clear_text.append(c)
control_str = ''.join(control_word) else:
if control_str == 'par' or control_str == 'line': control_str = ''.join(control_word)
clear_text.append(u'\n') if control_str == 'par' or control_str == 'line':
elif control_str == 'tab': clear_text.append(u'\n')
clear_text.append(u'\t') elif control_str == 'tab':
# Prefer the encoding specified by the RTF data to that clear_text.append(u'\t')
# specified by the Paradox table header # Prefer the encoding specified by the RTF data to that
# West European encoding # specified by the Paradox table header
elif control_str == 'fcharset0': # West European encoding
encoding = u'cp1252' elif control_str == 'fcharset0':
# Greek encoding encoding = u'cp1252'
elif control_str == 'fcharset161': # Greek encoding
encoding = u'cp1253' elif control_str == 'fcharset161':
# Turkish encoding encoding = u'cp1253'
elif control_str == 'fcharset162': # Turkish encoding
encoding = u'cp1254' elif control_str == 'fcharset162':
# Vietnamese encoding encoding = u'cp1254'
elif control_str == 'fcharset163': # Vietnamese encoding
encoding = u'cp1258' elif control_str == 'fcharset163':
# Hebrew encoding encoding = u'cp1258'
elif control_str == 'fcharset177': # Hebrew encoding
encoding = u'cp1255' elif control_str == 'fcharset177':
# Arabic encoding encoding = u'cp1255'
elif control_str == 'fcharset178': # Arabic encoding
encoding = u'cp1256' elif control_str == 'fcharset178':
# Baltic encoding encoding = u'cp1256'
elif control_str == 'fcharset186': # Baltic encoding
encoding = u'cp1257' elif control_str == 'fcharset186':
# Cyrillic encoding encoding = u'cp1257'
elif control_str == 'fcharset204': # Cyrillic encoding
encoding = u'cp1251' elif control_str == 'fcharset204':
# Thai encoding encoding = u'cp1251'
elif control_str == 'fcharset222': # Thai encoding
encoding = u'cp874' elif control_str == 'fcharset222':
# Central+East European encoding encoding = u'cp874'
elif control_str == 'fcharset238': # Central+East European encoding
encoding = u'cp1250' elif control_str == 'fcharset238':
elif control_str[0] == '\'': encoding = u'cp1250'
s = chr(int(control_str[1:3], 16)) elif control_str[0] == '\'':
clear_text.append(s.decode(encoding)) s = chr(int(control_str[1:3], 16))
del control_word[:] clear_text.append(s.decode(encoding))
if c == '\\' and new_control: del control_word[:]
control = True if c == '\\' and new_control:
elif c == '{': control = True
depth += 1 elif c == '{':
elif c == '}': depth += 1
depth -= 1 elif c == '}':
elif depth > 2: depth -= 1
continue elif depth > 2:
elif c == '\n' or c == '\r': continue
continue elif c == '\n' or c == '\r':
elif c == '\\': continue
control = True elif c == '\\':
else: control = True
clear_text.append(c) else:
return u''.join(clear_text) clear_text.append(c)
return u''.join(clear_text)
class FieldDescEntry:
def __init__(self, name, type, size): class FieldDescEntry:
self.name = name def __init__(self, name, type, size):
self.type = type self.name = name
self.size = size self.type = type
self.size = size
class EasyWorshipSongImport(SongImport):
""" class EasyWorshipSongImport(SongImport):
The :class:`EasyWorshipSongImport` class provides OpenLP with the """
ability to import EasyWorship song files. The :class:`EasyWorshipSongImport` class provides OpenLP with the
""" ability to import EasyWorship song files.
def __init__(self, manager, **kwargs): """
SongImport.__init__(self, manager, **kwargs) def __init__(self, manager, **kwargs):
SongImport.__init__(self, manager, **kwargs)
def do_import(self):
# Open the DB and MB files if they exist def do_import(self):
import_source_mb = self.import_source.replace('.DB', '.MB') # Open the DB and MB files if they exist
if not os.path.isfile(self.import_source): import_source_mb = self.import_source.replace('.DB', '.MB')
return if not os.path.isfile(self.import_source):
if not os.path.isfile(import_source_mb): return
return if not os.path.isfile(import_source_mb):
db_size = os.path.getsize(self.import_source) return
if db_size < 0x800: db_size = os.path.getsize(self.import_source)
return if db_size < 0x800:
db_file = open(self.import_source, 'rb') return
self.memo_file = open(import_source_mb, 'rb') db_file = open(self.import_source, 'rb')
# Don't accept files that are clearly not paradox files self.memo_file = open(import_source_mb, 'rb')
record_size, header_size, block_size, first_block, num_fields \ # Don't accept files that are clearly not paradox files
= struct.unpack('<hhxb8xh17xh', db_file.read(35)) record_size, header_size, block_size, first_block, num_fields \
if header_size != 0x800 or block_size < 1 or block_size > 4: = struct.unpack('<hhxb8xh17xh', db_file.read(35))
db_file.close() if header_size != 0x800 or block_size < 1 or block_size > 4:
self.memo_file.close() db_file.close()
return self.memo_file.close()
# Take a stab at how text is encoded return
self.encoding = u'cp1252' # Take a stab at how text is encoded
db_file.seek(106) self.encoding = u'cp1252'
code_page, = struct.unpack('<h', db_file.read(2)) db_file.seek(106)
if code_page == 852: code_page, = struct.unpack('<h', db_file.read(2))
self.encoding = u'cp1250' if code_page == 852:
# The following codepage to actual encoding mappings have not been self.encoding = u'cp1250'
# observed, but merely guessed. Actual example files are needed. # The following codepage to actual encoding mappings have not been
elif code_page == 737: # observed, but merely guessed. Actual example files are needed.
self.encoding = u'cp1253' elif code_page == 737:
elif code_page == 775: self.encoding = u'cp1253'
self.encoding = u'cp1257' elif code_page == 775:
elif code_page == 855: self.encoding = u'cp1257'
self.encoding = u'cp1251' elif code_page == 855:
elif code_page == 857: self.encoding = u'cp1251'
self.encoding = u'cp1254' elif code_page == 857:
elif code_page == 866: self.encoding = u'cp1254'
self.encoding = u'cp1251' elif code_page == 866:
elif code_page == 869: self.encoding = u'cp1251'
self.encoding = u'cp1253' elif code_page == 869:
elif code_page == 862: self.encoding = u'cp1253'
self.encoding = u'cp1255' elif code_page == 862:
elif code_page == 874: self.encoding = u'cp1255'
self.encoding = u'cp874' elif code_page == 874:
self.encoding = retrieve_windows_encoding(self.encoding) self.encoding = u'cp874'
if not self.encoding: self.encoding = retrieve_windows_encoding(self.encoding)
return if not self.encoding:
# There does not appear to be a _reliable_ way of getting the number return
# of songs/records, so let's use file blocks for measuring progress. # There does not appear to be a _reliable_ way of getting the number
total_blocks = (db_size - header_size) / (block_size * 1024) # of songs/records, so let's use file blocks for measuring progress.
self.import_wizard.progressBar.setMaximum(total_blocks) total_blocks = (db_size - header_size) / (block_size * 1024)
# Read the field description information self.import_wizard.progressBar.setMaximum(total_blocks)
db_file.seek(120) # Read the field description information
field_info = db_file.read(num_fields * 2) db_file.seek(120)
db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR) field_info = db_file.read(num_fields * 2)
field_names = db_file.read(header_size - db_file.tell()).split('\0', db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR)
num_fields) field_names = db_file.read(header_size - db_file.tell()).split('\0',
field_names.pop() num_fields)
field_descs = [] field_names.pop()
for i, field_name in enumerate(field_names): field_descs = []
field_type, field_size = struct.unpack_from('BB', for i, field_name in enumerate(field_names):
field_info, i * 2) field_type, field_size = struct.unpack_from('BB',
field_descs.append(FieldDescEntry(field_name, field_type, field_info, i * 2)
field_size)) field_descs.append(FieldDescEntry(field_name, field_type,
self.set_record_struct(field_descs) field_size))
# Pick out the field description indexes we will need self.set_record_struct(field_descs)
try: # Pick out the field description indexes we will need
success = True try:
fi_title = self.find_field(u'Title') success = True
fi_author = self.find_field(u'Author') fi_title = self.find_field(u'Title')
fi_copy = self.find_field(u'Copyright') fi_author = self.find_field(u'Author')
fi_admin = self.find_field(u'Administrator') fi_copy = self.find_field(u'Copyright')
fi_words = self.find_field(u'Words') fi_admin = self.find_field(u'Administrator')
fi_ccli = self.find_field(u'Song Number') fi_words = self.find_field(u'Words')
except IndexError: fi_ccli = self.find_field(u'Song Number')
# This is the wrong table except IndexError:
success = False # This is the wrong table
# Loop through each block of the file success = False
cur_block = first_block # Loop through each block of the file
while cur_block != 0 and success: cur_block = first_block
db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size)) while cur_block != 0 and success:
cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6)) db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size))
rec_count = (rec_count + record_size) / record_size cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6))
# Loop through each record within the current block rec_count = (rec_count + record_size) / record_size
for i in range(rec_count): # Loop through each record within the current block
if self.stop_import_flag: for i in range(rec_count):
break if self.stop_import_flag:
raw_record = db_file.read(record_size) break
self.fields = self.record_struct.unpack(raw_record) raw_record = db_file.read(record_size)
self.set_defaults() self.fields = self.record_struct.unpack(raw_record)
self.title = self.get_field(fi_title) self.set_defaults()
# Get remaining fields. self.title = self.get_field(fi_title)
copy = self.get_field(fi_copy) # Get remaining fields.
admin = self.get_field(fi_admin) copy = self.get_field(fi_copy)
ccli = self.get_field(fi_ccli) admin = self.get_field(fi_admin)
authors = self.get_field(fi_author) ccli = self.get_field(fi_ccli)
words = self.get_field(fi_words) authors = self.get_field(fi_author)
# Set the SongImport object members. words = self.get_field(fi_words)
if copy: # Set the SongImport object members.
self.copyright = copy if copy:
if admin: self.copyright = copy
if copy: if admin:
self.copyright += u', ' if copy:
self.copyright += \ self.copyright += u', '
unicode(translate('SongsPlugin.EasyWorshipSongImport', self.copyright += \
'Administered by %s')) % admin unicode(translate('SongsPlugin.EasyWorshipSongImport',
if ccli: 'Administered by %s')) % admin
self.ccli_number = ccli if ccli:
if authors: self.ccli_number = ccli
# Split up the authors if authors:
author_list = authors.split(u'/') # Split up the authors
if len(author_list) < 2: author_list = authors.split(u'/')
author_list = authors.split(u';') if len(author_list) < 2:
if len(author_list) < 2: author_list = authors.split(u';')
author_list = authors.split(u',') if len(author_list) < 2:
for author_name in author_list: author_list = authors.split(u',')
self.add_author(author_name.strip()) for author_name in author_list:
if words: self.add_author(author_name.strip())
# Format the lyrics if words:
words = strip_rtf(words, self.encoding) # Format the lyrics
for verse in words.split(u'\n\n'): words = strip_rtf(words, self.encoding)
self.add_verse( for verse in words.split(u'\n\n'):
verse.strip(), VerseType.Tags[VerseType.Verse]) # TODO: recognize note-part as well and put into comments-section
if self.stop_import_flag: # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
break verse_split = verse.strip().split(u'\n', 1)
if not self.finish(): verse_type = VerseType.Tags[VerseType.Verse]
self.log_error(self.import_source) first_line_is_tag = False
db_file.close() for type in VerseType.Names+['tag', 'slide']: # doesnt cover tag, slide
self.memo_file.close() type = type.lower()
ew_tag = verse_split[0].strip().lower()
def find_field(self, field_name): if ew_tag.startswith(type):
return [i for i, x in enumerate(self.field_descs) #print ew_tag
if x.name == field_name][0] verse_type = type[0]
if type == 'tag' or type == 'slide':
def set_record_struct(self, field_descs): verse_type = VerseType.Tags[VerseType.Other]
# Begin with empty field struct list first_line_is_tag = True
fsl = ['>'] if len(ew_tag) > len(type): # tag is followed by number and/or note
for field_desc in field_descs: p = re.compile(r'[0-9]+')
if field_desc.type == 1: m = re.search(p, ew_tag)
# string if m:
fsl.append('%ds' % field_desc.size) number = m.group()
elif field_desc.type == 3: verse_type +=number
# 16-bit int
fsl.append('H') p = re.compile(r'\(.*\)')
elif field_desc.type == 4: m = re.search(p, ew_tag)
# 32-bit int if m:
fsl.append('I') self.comments += ew_tag+'\n'
elif field_desc.type == 9: break
# Logical
fsl.append('B') self.add_verse(
elif field_desc.type == 0x0c: verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1
# Memo verse_type)
fsl.append('%ds' % field_desc.size) if len(self.comments) > 5:
elif field_desc.type == 0x0d: self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport',
# Blob '\n[above are Song Tags with notes imported from EasyWorship]'))
fsl.append('%ds' % field_desc.size) if self.stop_import_flag:
elif field_desc.type == 0x15: break
# Timestamp if not self.finish():
fsl.append('Q') self.log_error(self.import_source)
else: db_file.close()
fsl.append('%ds' % field_desc.size) self.memo_file.close()
self.record_struct = struct.Struct(''.join(fsl))
self.field_descs = field_descs def find_field(self, field_name):
return [i for i, x in enumerate(self.field_descs)
def get_field(self, field_desc_index): if x.name == field_name][0]
field = self.fields[field_desc_index]
field_desc = self.field_descs[field_desc_index] def set_record_struct(self, field_descs):
# Return None in case of 'blank' entries # Begin with empty field struct list
if isinstance(field, str): fsl = ['>']
if len(field.rstrip('\0')) == 0: for field_desc in field_descs:
return None if field_desc.type == 1:
elif field == 0: # string
return None fsl.append('%ds' % field_desc.size)
# Format the field depending on the field type elif field_desc.type == 3:
if field_desc.type == 1: # 16-bit int
# string fsl.append('H')
return field.rstrip('\0').decode(self.encoding) elif field_desc.type == 4:
elif field_desc.type == 3: # 32-bit int
# 16-bit int fsl.append('I')
return field ^ 0x8000 elif field_desc.type == 9:
elif field_desc.type == 4: # Logical
# 32-bit int fsl.append('B')
return field ^ 0x80000000 elif field_desc.type == 0x0c:
elif field_desc.type == 9: # Memo
# Logical fsl.append('%ds' % field_desc.size)
return (field ^ 0x80 == 1) elif field_desc.type == 0x0d:
elif field_desc.type == 0x0c or field_desc.type == 0x0d: # Blob
# Memo or Blob fsl.append('%ds' % field_desc.size)
block_start, blob_size = \ elif field_desc.type == 0x15:
struct.unpack_from('<II', field, len(field)-10) # Timestamp
sub_block = block_start & 0xff fsl.append('Q')
block_start &= ~0xff else:
self.memo_file.seek(block_start) fsl.append('%ds' % field_desc.size)
memo_block_type, = struct.unpack('b', self.memo_file.read(1)) self.record_struct = struct.Struct(''.join(fsl))
if memo_block_type == 2: self.field_descs = field_descs
self.memo_file.seek(8, os.SEEK_CUR)
elif memo_block_type == 3: def get_field(self, field_desc_index):
if sub_block > 63: field = self.fields[field_desc_index]
return u'' field_desc = self.field_descs[field_desc_index]
self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR) # Return None in case of 'blank' entries
sub_block_start, = struct.unpack('B', self.memo_file.read(1)) if isinstance(field, str):
self.memo_file.seek(block_start + (sub_block_start * 16)) if len(field.rstrip('\0')) == 0:
else: return None
return u'' elif field == 0:
return self.memo_file.read(blob_size) return None
else: # Format the field depending on the field type
return 0 if field_desc.type == 1:
# string
return field.rstrip('\0').decode(self.encoding)
elif field_desc.type == 3:
# 16-bit int
return field ^ 0x8000
elif field_desc.type == 4:
# 32-bit int
return field ^ 0x80000000
elif field_desc.type == 9:
# Logical
return (field ^ 0x80 == 1)
elif field_desc.type == 0x0c or field_desc.type == 0x0d:
# Memo or Blob
block_start, blob_size = \
struct.unpack_from('<II', field, len(field)-10)
sub_block = block_start & 0xff
block_start &= ~0xff
self.memo_file.seek(block_start)
memo_block_type, = struct.unpack('b', self.memo_file.read(1))
if memo_block_type == 2:
self.memo_file.seek(8, os.SEEK_CUR)
elif memo_block_type == 3:
if sub_block > 63:
return u''
self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR)
sub_block_start, = struct.unpack('B', self.memo_file.read(1))
self.memo_file.seek(block_start + (sub_block_start * 16))
else:
return u''
return self.memo_file.read(blob_size)
else:
return 0