openlp/openlp/plugins/songs/lib/ewimport.py
2011-01-13 17:55:29 +00:00

355 lines
15 KiB
Python

# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
# Copyright (c) 2008-2011 Raoul Snyman #
# Portions copyright (c) 2008-2011 Tim Bentley, Jonathan Corwin, Michael #
# Gorven, Scott Guerrieri, Meinert Jordan, Andreas Preikschat, Christian #
# Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon Tibble, #
# Carsten Tinggaard, Frode Woldsund, Jeffrey Smith #
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
"""
The :mod:`ewimport` module provides the functionality for importing
EasyWorship song databases into the current installation database.
"""
import os
import struct
from openlp.core.lib import translate
from openlp.plugins.songs.lib import retrieve_windows_encoding
from songimport import SongImport
def strip_rtf(blob, encoding):
depth = 0
control = False
clear_text = []
control_word = []
for c in blob:
if control:
# for delimiters, set control to False
if c == '{':
if len(control_word) > 0:
depth += 1
control = False
elif c == '}':
if len(control_word) > 0:
depth -= 1
control = False
elif c == '\\':
new_control = (len(control_word) > 0)
control = False
elif c.isspace():
control = False
else:
control_word.append(c)
if len(control_word) == 3 and control_word[0] == '\'':
control = False
if not control:
if len(control_word) == 0:
if c == '{' or c == '}' or c == '\\':
clear_text.append(c)
else:
control_str = ''.join(control_word)
if control_str == 'par' or control_str == 'line':
clear_text.append(u'\n')
elif control_str == 'tab':
clear_text.append(u'\t')
# Prefer the encoding specified by the RTF data to that
# specified by the Paradox table header
# West European encoding
elif control_str == 'fcharset0':
encoding = u'cp1252'
# Greek encoding
elif control_str == 'fcharset161':
encoding = u'cp1253'
# Turkish encoding
elif control_str == 'fcharset162':
encoding = u'cp1254'
# Vietnamese encoding
elif control_str == 'fcharset163':
encoding = u'cp1258'
# Hebrew encoding
elif control_str == 'fcharset177':
encoding = u'cp1255'
# Arabic encoding
elif control_str == 'fcharset178':
encoding = u'cp1256'
# Baltic encoding
elif control_str == 'fcharset186':
encoding = u'cp1257'
# Cyrillic encoding
elif control_str == 'fcharset204':
encoding = u'cp1251'
# Thai encoding
elif control_str == 'fcharset222':
encoding = u'cp874'
# Central+East European encoding
elif control_str == 'fcharset238':
encoding = u'cp1250'
elif control_str[0] == '\'':
s = chr(int(control_str[1:3], 16))
clear_text.append(s.decode(encoding))
del control_word[:]
if c == '\\' and new_control:
control = True
elif c == '{':
depth += 1
elif c == '}':
depth -= 1
elif depth > 2:
continue
elif c == '\n' or c == '\r':
continue
elif c == '\\':
control = True
else:
clear_text.append(c)
return u''.join(clear_text)
class FieldDescEntry:
def __init__(self, name, type, size):
self.name = name
self.type = type
self.size = size
class EasyWorshipSongImport(SongImport):
"""
The :class:`EasyWorshipSongImport` class provides OpenLP with the
ability to import EasyWorship song files.
"""
def __init__(self, manager, **kwargs):
self.import_source = kwargs[u'filename']
SongImport.__init__(self, manager)
def do_import(self):
# Open the DB and MB files if they exist
import_source_mb = self.import_source.replace('.DB', '.MB')
if not os.path.isfile(self.import_source):
return False
if not os.path.isfile(import_source_mb):
return False
db_size = os.path.getsize(self.import_source)
if db_size < 0x800:
return False
db_file = open(self.import_source, 'rb')
self.memo_file = open(import_source_mb, 'rb')
# Don't accept files that are clearly not paradox files
record_size, header_size, block_size, first_block, num_fields \
= struct.unpack('<hhxb8xh17xh', db_file.read(35))
if header_size != 0x800 or block_size < 1 or block_size > 4:
db_file.close()
self.memo_file.close()
return False
# Take a stab at how text is encoded
self.encoding = u'cp1252'
db_file.seek(106)
code_page, = struct.unpack('<h', db_file.read(2))
if code_page == 852:
self.encoding = u'cp1250'
# The following codepage to actual encoding mappings have not been
# observed, but merely guessed. Actual example files are needed.
elif code_page == 737:
self.encoding = u'cp1253'
elif code_page == 775:
self.encoding = u'cp1257'
elif code_page == 855:
self.encoding = u'cp1251'
elif code_page == 857:
self.encoding = u'cp1254'
elif code_page == 866:
self.encoding = u'cp1251'
elif code_page == 869:
self.encoding = u'cp1253'
elif code_page == 862:
self.encoding = u'cp1255'
elif code_page == 874:
self.encoding = u'cp874'
self.encoding = retrieve_windows_encoding(self.encoding)
if not self.encoding:
return False
# There does not appear to be a _reliable_ way of getting the number
# of songs/records, so let's use file blocks for measuring progress.
total_blocks = (db_size - header_size) / (block_size * 1024)
self.import_wizard.progressBar.setMaximum(total_blocks)
# Read the field description information
db_file.seek(120)
field_info = db_file.read(num_fields * 2)
db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR)
field_names = db_file.read(header_size - db_file.tell()).split('\0',
num_fields)
field_names.pop()
field_descs = []
for i, field_name in enumerate(field_names):
field_type, field_size = struct.unpack_from('BB',
field_info, i * 2)
field_descs.append(FieldDescEntry(field_name, field_type,
field_size))
self.set_record_struct(field_descs)
# Pick out the field description indexes we will need
success = True
try:
fi_title = self.find_field(u'Title')
fi_author = self.find_field(u'Author')
fi_copy = self.find_field(u'Copyright')
fi_admin = self.find_field(u'Administrator')
fi_words = self.find_field(u'Words')
fi_ccli = self.find_field(u'Song Number')
except IndexError:
# This is the wrong table
success = False
# Loop through each block of the file
cur_block = first_block
while cur_block != 0 and success:
db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size))
cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6))
rec_count = (rec_count + record_size) / record_size
# Loop through each record within the current block
for i in range(rec_count):
if self.stop_import_flag:
success = False
break
raw_record = db_file.read(record_size)
self.fields = self.record_struct.unpack(raw_record)
self.set_defaults()
# Get title and update progress bar message
title = self.get_field(fi_title)
if title:
self.import_wizard.incrementProgressBar(
unicode(translate('SongsPlugin.ImportWizardForm',
'Importing "%s"...')) % title, 0)
self.title = title
# Get remaining fields
copy = self.get_field(fi_copy)
admin = self.get_field(fi_admin)
ccli = self.get_field(fi_ccli)
authors = self.get_field(fi_author)
words = self.get_field(fi_words)
# Set the SongImport object members
if copy:
self.copyright = copy
if admin:
if copy:
self.copyright += u', '
self.copyright += \
unicode(translate('SongsPlugin.ImportWizardForm',
'Administered by %s')) % admin
if ccli:
self.ccli_number = ccli
if authors:
# Split up the authors
author_list = authors.split(u'/')
if len(author_list) < 2:
author_list = authors.split(u';')
if len(author_list) < 2:
author_list = authors.split(u',')
for author_name in author_list:
self.add_author(author_name.strip())
if words:
# Format the lyrics
words = strip_rtf(words, self.encoding)
for verse in words.split(u'\n\n'):
self.add_verse(verse.strip(), u'V')
if self.stop_import_flag:
success = False
break
self.finish()
if not self.stop_import_flag:
self.import_wizard.incrementProgressBar(u'')
db_file.close()
self.memo_file.close()
return success
def find_field(self, field_name):
return [i for i, x in enumerate(self.field_descs) \
if x.name == field_name][0]
def set_record_struct(self, field_descs):
# Begin with empty field struct list
fsl = ['>']
for field_desc in field_descs:
if field_desc.type == 1:
# string
fsl.append('%ds' % field_desc.size)
elif field_desc.type == 3:
# 16-bit int
fsl.append('H')
elif field_desc.type == 4:
# 32-bit int
fsl.append('I')
elif field_desc.type == 9:
# Logical
fsl.append('B')
elif field_desc.type == 0x0c:
# Memo
fsl.append('%ds' % field_desc.size)
elif field_desc.type == 0x0d:
# Blob
fsl.append('%ds' % field_desc.size)
elif field_desc.type == 0x15:
# Timestamp
fsl.append('Q')
else:
fsl.append('%ds' % field_desc.size)
self.record_struct = struct.Struct(''.join(fsl))
self.field_descs = field_descs
def get_field(self, field_desc_index):
field = self.fields[field_desc_index]
field_desc = self.field_descs[field_desc_index]
# Return None in case of 'blank' entries
if isinstance(field, str):
if len(field.rstrip('\0')) == 0:
return None
elif field == 0:
return None
# Format the field depending on the field type
if field_desc.type == 1:
# string
return field.rstrip('\0').decode(self.encoding)
elif field_desc.type == 3:
# 16-bit int
return field ^ 0x8000
elif field_desc.type == 4:
# 32-bit int
return field ^ 0x80000000
elif field_desc.type == 9:
# Logical
return (field ^ 0x80 == 1)
elif field_desc.type == 0x0c or field_desc.type == 0x0d:
# Memo or Blob
block_start, blob_size = \
struct.unpack_from('<II', field, len(field)-10)
sub_block = block_start & 0xff
block_start &= ~0xff
self.memo_file.seek(block_start)
memo_block_type, = struct.unpack('b', self.memo_file.read(1))
if memo_block_type == 2:
self.memo_file.seek(8, os.SEEK_CUR)
elif memo_block_type == 3:
if sub_block > 63:
return u''
self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR)
sub_block_start, = struct.unpack('B', self.memo_file.read(1))
self.memo_file.seek(block_start + (sub_block_start * 16))
else:
return u''
return self.memo_file.read(blob_size)
else:
return 0