fixed line endings

This commit is contained in:
Benny 2011-06-21 07:55:11 +02:00
parent 2939151ff1
commit 31dd4945ba

View File

@ -1,380 +1,379 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 # vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
############################################################################### ###############################################################################
# OpenLP - Open Source Lyrics Projection # # OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
# Copyright (c) 2008-2011 Raoul Snyman # # Copyright (c) 2008-2011 Raoul Snyman #
# Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan # # Portions copyright (c) 2008-2011 Tim Bentley, Gerald Britton, Jonathan #
# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, # # Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan, #
# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias # # Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias #
# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # # Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, #
# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund # # Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund #
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it # # This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free # # under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. # # Software Foundation; version 2 of the License. #
# # # #
# This program is distributed in the hope that it will be useful, but WITHOUT # # This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. # # more details. #
# # # #
# You should have received a copy of the GNU General Public License along # # You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 # # with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Temple Place, Suite 330, Boston, MA 02111-1307 USA #
############################################################################### ###############################################################################
""" """
The :mod:`ewimport` module provides the functionality for importing The :mod:`ewimport` module provides the functionality for importing
EasyWorship song databases into the current installation database. EasyWorship song databases into the current installation database.
""" """
import os import os
import struct import struct
import re import re
from openlp.core.lib import translate from openlp.core.lib import translate
from openlp.core.ui.wizard import WizardStrings from openlp.core.ui.wizard import WizardStrings
from openlp.plugins.songs.lib import VerseType from openlp.plugins.songs.lib import VerseType
from openlp.plugins.songs.lib import retrieve_windows_encoding from openlp.plugins.songs.lib import retrieve_windows_encoding
from songimport import SongImport from songimport import SongImport
def strip_rtf(blob, encoding): def strip_rtf(blob, encoding):
depth = 0 depth = 0
control = False control = False
clear_text = [] clear_text = []
control_word = [] control_word = []
for c in blob: for c in blob:
if control: if control:
# for delimiters, set control to False # for delimiters, set control to False
if c == '{': if c == '{':
if len(control_word) > 0: if len(control_word) > 0:
depth += 1 depth += 1
control = False control = False
elif c == '}': elif c == '}':
if len(control_word) > 0: if len(control_word) > 0:
depth -= 1 depth -= 1
control = False control = False
elif c == '\\': elif c == '\\':
new_control = (len(control_word) > 0) new_control = (len(control_word) > 0)
control = False control = False
elif c.isspace(): elif c.isspace():
control = False control = False
else: else:
control_word.append(c) control_word.append(c)
if len(control_word) == 3 and control_word[0] == '\'': if len(control_word) == 3 and control_word[0] == '\'':
control = False control = False
if not control: if not control:
if len(control_word) == 0: if len(control_word) == 0:
if c == '{' or c == '}' or c == '\\': if c == '{' or c == '}' or c == '\\':
clear_text.append(c) clear_text.append(c)
else: else:
control_str = ''.join(control_word) control_str = ''.join(control_word)
if control_str == 'par' or control_str == 'line': if control_str == 'par' or control_str == 'line':
clear_text.append(u'\n') clear_text.append(u'\n')
elif control_str == 'tab': elif control_str == 'tab':
clear_text.append(u'\t') clear_text.append(u'\t')
# Prefer the encoding specified by the RTF data to that # Prefer the encoding specified by the RTF data to that
# specified by the Paradox table header # specified by the Paradox table header
# West European encoding # West European encoding
elif control_str == 'fcharset0': elif control_str == 'fcharset0':
encoding = u'cp1252' encoding = u'cp1252'
# Greek encoding # Greek encoding
elif control_str == 'fcharset161': elif control_str == 'fcharset161':
encoding = u'cp1253' encoding = u'cp1253'
# Turkish encoding # Turkish encoding
elif control_str == 'fcharset162': elif control_str == 'fcharset162':
encoding = u'cp1254' encoding = u'cp1254'
# Vietnamese encoding # Vietnamese encoding
elif control_str == 'fcharset163': elif control_str == 'fcharset163':
encoding = u'cp1258' encoding = u'cp1258'
# Hebrew encoding # Hebrew encoding
elif control_str == 'fcharset177': elif control_str == 'fcharset177':
encoding = u'cp1255' encoding = u'cp1255'
# Arabic encoding # Arabic encoding
elif control_str == 'fcharset178': elif control_str == 'fcharset178':
encoding = u'cp1256' encoding = u'cp1256'
# Baltic encoding # Baltic encoding
elif control_str == 'fcharset186': elif control_str == 'fcharset186':
encoding = u'cp1257' encoding = u'cp1257'
# Cyrillic encoding # Cyrillic encoding
elif control_str == 'fcharset204': elif control_str == 'fcharset204':
encoding = u'cp1251' encoding = u'cp1251'
# Thai encoding # Thai encoding
elif control_str == 'fcharset222': elif control_str == 'fcharset222':
encoding = u'cp874' encoding = u'cp874'
# Central+East European encoding # Central+East European encoding
elif control_str == 'fcharset238': elif control_str == 'fcharset238':
encoding = u'cp1250' encoding = u'cp1250'
elif control_str[0] == '\'': elif control_str[0] == '\'':
s = chr(int(control_str[1:3], 16)) s = chr(int(control_str[1:3], 16))
clear_text.append(s.decode(encoding)) clear_text.append(s.decode(encoding))
del control_word[:] del control_word[:]
if c == '\\' and new_control: if c == '\\' and new_control:
control = True control = True
elif c == '{': elif c == '{':
depth += 1 depth += 1
elif c == '}': elif c == '}':
depth -= 1 depth -= 1
elif depth > 2: elif depth > 2:
continue continue
elif c == '\n' or c == '\r': elif c == '\n' or c == '\r':
continue continue
elif c == '\\': elif c == '\\':
control = True control = True
else: else:
clear_text.append(c) clear_text.append(c)
return u''.join(clear_text) return u''.join(clear_text)
class FieldDescEntry: class FieldDescEntry:
def __init__(self, name, type, size): def __init__(self, name, type, size):
self.name = name self.name = name
self.type = type self.type = type
self.size = size self.size = size
class EasyWorshipSongImport(SongImport): class EasyWorshipSongImport(SongImport):
""" """
The :class:`EasyWorshipSongImport` class provides OpenLP with the The :class:`EasyWorshipSongImport` class provides OpenLP with the
ability to import EasyWorship song files. ability to import EasyWorship song files.
""" """
def __init__(self, manager, **kwargs): def __init__(self, manager, **kwargs):
SongImport.__init__(self, manager, **kwargs) SongImport.__init__(self, manager, **kwargs)
def do_import(self): def do_import(self):
# Open the DB and MB files if they exist # Open the DB and MB files if they exist
import_source_mb = self.import_source.replace('.DB', '.MB') import_source_mb = self.import_source.replace('.DB', '.MB')
if not os.path.isfile(self.import_source): if not os.path.isfile(self.import_source):
return return
if not os.path.isfile(import_source_mb): if not os.path.isfile(import_source_mb):
return return
db_size = os.path.getsize(self.import_source) db_size = os.path.getsize(self.import_source)
if db_size < 0x800: if db_size < 0x800:
return return
db_file = open(self.import_source, 'rb') db_file = open(self.import_source, 'rb')
self.memo_file = open(import_source_mb, 'rb') self.memo_file = open(import_source_mb, 'rb')
# Don't accept files that are clearly not paradox files # Don't accept files that are clearly not paradox files
record_size, header_size, block_size, first_block, num_fields \ record_size, header_size, block_size, first_block, num_fields \
= struct.unpack('<hhxb8xh17xh', db_file.read(35)) = struct.unpack('<hhxb8xh17xh', db_file.read(35))
if header_size != 0x800 or block_size < 1 or block_size > 4: if header_size != 0x800 or block_size < 1 or block_size > 4:
db_file.close() db_file.close()
self.memo_file.close() self.memo_file.close()
return return
# Take a stab at how text is encoded # Take a stab at how text is encoded
self.encoding = u'cp1252' self.encoding = u'cp1252'
db_file.seek(106) db_file.seek(106)
code_page, = struct.unpack('<h', db_file.read(2)) code_page, = struct.unpack('<h', db_file.read(2))
if code_page == 852: if code_page == 852:
self.encoding = u'cp1250' self.encoding = u'cp1250'
# The following codepage to actual encoding mappings have not been # The following codepage to actual encoding mappings have not been
# observed, but merely guessed. Actual example files are needed. # observed, but merely guessed. Actual example files are needed.
elif code_page == 737: elif code_page == 737:
self.encoding = u'cp1253' self.encoding = u'cp1253'
elif code_page == 775: elif code_page == 775:
self.encoding = u'cp1257' self.encoding = u'cp1257'
elif code_page == 855: elif code_page == 855:
self.encoding = u'cp1251' self.encoding = u'cp1251'
elif code_page == 857: elif code_page == 857:
self.encoding = u'cp1254' self.encoding = u'cp1254'
elif code_page == 866: elif code_page == 866:
self.encoding = u'cp1251' self.encoding = u'cp1251'
elif code_page == 869: elif code_page == 869:
self.encoding = u'cp1253' self.encoding = u'cp1253'
elif code_page == 862: elif code_page == 862:
self.encoding = u'cp1255' self.encoding = u'cp1255'
elif code_page == 874: elif code_page == 874:
self.encoding = u'cp874' self.encoding = u'cp874'
self.encoding = retrieve_windows_encoding(self.encoding) self.encoding = retrieve_windows_encoding(self.encoding)
if not self.encoding: if not self.encoding:
return return
# There does not appear to be a _reliable_ way of getting the number # There does not appear to be a _reliable_ way of getting the number
# of songs/records, so let's use file blocks for measuring progress. # of songs/records, so let's use file blocks for measuring progress.
total_blocks = (db_size - header_size) / (block_size * 1024) total_blocks = (db_size - header_size) / (block_size * 1024)
self.import_wizard.progressBar.setMaximum(total_blocks) self.import_wizard.progressBar.setMaximum(total_blocks)
# Read the field description information # Read the field description information
db_file.seek(120) db_file.seek(120)
field_info = db_file.read(num_fields * 2) field_info = db_file.read(num_fields * 2)
db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR) db_file.seek(4 + (num_fields * 4) + 261, os.SEEK_CUR)
field_names = db_file.read(header_size - db_file.tell()).split('\0', field_names = db_file.read(header_size - db_file.tell()).split('\0',
num_fields) num_fields)
field_names.pop() field_names.pop()
field_descs = [] field_descs = []
for i, field_name in enumerate(field_names): for i, field_name in enumerate(field_names):
field_type, field_size = struct.unpack_from('BB', field_type, field_size = struct.unpack_from('BB',
field_info, i * 2) field_info, i * 2)
field_descs.append(FieldDescEntry(field_name, field_type, field_descs.append(FieldDescEntry(field_name, field_type,
field_size)) field_size))
self.set_record_struct(field_descs) self.set_record_struct(field_descs)
# Pick out the field description indexes we will need # Pick out the field description indexes we will need
try: try:
success = True success = True
fi_title = self.find_field(u'Title') fi_title = self.find_field(u'Title')
fi_author = self.find_field(u'Author') fi_author = self.find_field(u'Author')
fi_copy = self.find_field(u'Copyright') fi_copy = self.find_field(u'Copyright')
fi_admin = self.find_field(u'Administrator') fi_admin = self.find_field(u'Administrator')
fi_words = self.find_field(u'Words') fi_words = self.find_field(u'Words')
fi_ccli = self.find_field(u'Song Number') fi_ccli = self.find_field(u'Song Number')
except IndexError: except IndexError:
# This is the wrong table # This is the wrong table
success = False success = False
# Loop through each block of the file # Loop through each block of the file
cur_block = first_block cur_block = first_block
while cur_block != 0 and success: while cur_block != 0 and success:
db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size)) db_file.seek(header_size + ((cur_block - 1) * 1024 * block_size))
cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6)) cur_block, rec_count = struct.unpack('<h2xh', db_file.read(6))
rec_count = (rec_count + record_size) / record_size rec_count = (rec_count + record_size) / record_size
# Loop through each record within the current block # Loop through each record within the current block
for i in range(rec_count): for i in range(rec_count):
if self.stop_import_flag: if self.stop_import_flag:
break break
raw_record = db_file.read(record_size) raw_record = db_file.read(record_size)
self.fields = self.record_struct.unpack(raw_record) self.fields = self.record_struct.unpack(raw_record)
self.set_defaults() self.set_defaults()
self.title = self.get_field(fi_title) self.title = self.get_field(fi_title)
# Get remaining fields. # Get remaining fields.
copy = self.get_field(fi_copy) copy = self.get_field(fi_copy)
admin = self.get_field(fi_admin) admin = self.get_field(fi_admin)
ccli = self.get_field(fi_ccli) ccli = self.get_field(fi_ccli)
authors = self.get_field(fi_author) authors = self.get_field(fi_author)
words = self.get_field(fi_words) words = self.get_field(fi_words)
# Set the SongImport object members. # Set the SongImport object members.
if copy: if copy:
self.copyright = copy self.copyright = copy
if admin: if admin:
if copy: if copy:
self.copyright += u', ' self.copyright += u', '
self.copyright += \ self.copyright += \
unicode(translate('SongsPlugin.EasyWorshipSongImport', unicode(translate('SongsPlugin.EasyWorshipSongImport',
'Administered by %s')) % admin 'Administered by %s')) % admin
if ccli: if ccli:
self.ccli_number = ccli self.ccli_number = ccli
if authors: if authors:
# Split up the authors # Split up the authors
author_list = authors.split(u'/') author_list = authors.split(u'/')
if len(author_list) < 2: if len(author_list) < 2:
author_list = authors.split(u';') author_list = authors.split(u';')
if len(author_list) < 2: if len(author_list) < 2:
author_list = authors.split(u',') author_list = authors.split(u',')
for author_name in author_list: for author_name in author_list:
self.add_author(author_name.strip()) self.add_author(author_name.strip())
if words: if words:
# Format the lyrics # Format the lyrics
words = strip_rtf(words, self.encoding) words = strip_rtf(words, self.encoding)
for verse in words.split(u'\n\n'): for verse in words.split(u'\n\n'):
# TODO: recognize note-part as well and put into comments-section # TODO: recognize note-part as well and put into comments-section
# ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide # ew tags: verse, chorus, pre-chorus, bridge, tag, intro, ending, slide
verse_split = verse.strip().split(u'\n', 1) verse_split = verse.strip().split(u'\n', 1)
verse_type = VerseType.Tags[VerseType.Verse] verse_type = VerseType.Tags[VerseType.Verse]
first_line_is_tag = False first_line_is_tag = False
for type in VerseType.Names+['tag', 'slide']: # doesnt cover tag, slide for type in VerseType.Names+['tag', 'slide']: # doesnt cover tag, slide
type = type.lower() type = type.lower()
ew_tag = verse_split[0].strip().lower() ew_tag = verse_split[0].strip().lower()
if ew_tag.startswith(type): if ew_tag.startswith(type):
#print ew_tag #print ew_tag
verse_type = type[0] verse_type = type[0]
if type == 'tag' or type == 'slide': if type == 'tag' or type == 'slide':
verse_type = VerseType.Tags[VerseType.Other] verse_type = VerseType.Tags[VerseType.Other]
first_line_is_tag = True first_line_is_tag = True
if len(ew_tag) > len(type): # tag is followed by number and/or note if len(ew_tag) > len(type): # tag is followed by number and/or note
p = re.compile(r'[0-9]+') p = re.compile(r'[0-9]+')
m = re.search(p, ew_tag) m = re.search(p, ew_tag)
if m: if m:
number = m.group() number = m.group()
verse_type +=number verse_type +=number
p = re.compile(r'\(.*\)') p = re.compile(r'\(.*\)')
m = re.search(p, ew_tag) m = re.search(p, ew_tag)
if m: if m:
self.comments += ew_tag+'\n' self.comments += ew_tag+'\n'
break break
self.add_verse(
self.add_verse( verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1
verse_split[-1].strip() if first_line_is_tag else verse.strip(), # TODO: hacky: -1 verse_type)
verse_type) if len(self.comments) > 5:
if len(self.comments) > 5: self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport',
self.comments += unicode(translate('SongsPlugin.EasyWorshipSongImport', '\n[above are Song Tags with notes imported from EasyWorship]'))
'\n[above are Song Tags with notes imported from EasyWorship]')) if self.stop_import_flag:
if self.stop_import_flag: break
break if not self.finish():
if not self.finish(): self.log_error(self.import_source)
self.log_error(self.import_source) db_file.close()
db_file.close() self.memo_file.close()
self.memo_file.close()
def find_field(self, field_name):
def find_field(self, field_name): return [i for i, x in enumerate(self.field_descs)
return [i for i, x in enumerate(self.field_descs) if x.name == field_name][0]
if x.name == field_name][0]
def set_record_struct(self, field_descs):
def set_record_struct(self, field_descs): # Begin with empty field struct list
# Begin with empty field struct list fsl = ['>']
fsl = ['>'] for field_desc in field_descs:
for field_desc in field_descs: if field_desc.type == 1:
if field_desc.type == 1: # string
# string fsl.append('%ds' % field_desc.size)
fsl.append('%ds' % field_desc.size) elif field_desc.type == 3:
elif field_desc.type == 3: # 16-bit int
# 16-bit int fsl.append('H')
fsl.append('H') elif field_desc.type == 4:
elif field_desc.type == 4: # 32-bit int
# 32-bit int fsl.append('I')
fsl.append('I') elif field_desc.type == 9:
elif field_desc.type == 9: # Logical
# Logical fsl.append('B')
fsl.append('B') elif field_desc.type == 0x0c:
elif field_desc.type == 0x0c: # Memo
# Memo fsl.append('%ds' % field_desc.size)
fsl.append('%ds' % field_desc.size) elif field_desc.type == 0x0d:
elif field_desc.type == 0x0d: # Blob
# Blob fsl.append('%ds' % field_desc.size)
fsl.append('%ds' % field_desc.size) elif field_desc.type == 0x15:
elif field_desc.type == 0x15: # Timestamp
# Timestamp fsl.append('Q')
fsl.append('Q') else:
else: fsl.append('%ds' % field_desc.size)
fsl.append('%ds' % field_desc.size) self.record_struct = struct.Struct(''.join(fsl))
self.record_struct = struct.Struct(''.join(fsl)) self.field_descs = field_descs
self.field_descs = field_descs
def get_field(self, field_desc_index):
def get_field(self, field_desc_index): field = self.fields[field_desc_index]
field = self.fields[field_desc_index] field_desc = self.field_descs[field_desc_index]
field_desc = self.field_descs[field_desc_index] # Return None in case of 'blank' entries
# Return None in case of 'blank' entries if isinstance(field, str):
if isinstance(field, str): if len(field.rstrip('\0')) == 0:
if len(field.rstrip('\0')) == 0: return None
return None elif field == 0:
elif field == 0: return None
return None # Format the field depending on the field type
# Format the field depending on the field type if field_desc.type == 1:
if field_desc.type == 1: # string
# string return field.rstrip('\0').decode(self.encoding)
return field.rstrip('\0').decode(self.encoding) elif field_desc.type == 3:
elif field_desc.type == 3: # 16-bit int
# 16-bit int return field ^ 0x8000
return field ^ 0x8000 elif field_desc.type == 4:
elif field_desc.type == 4: # 32-bit int
# 32-bit int return field ^ 0x80000000
return field ^ 0x80000000 elif field_desc.type == 9:
elif field_desc.type == 9: # Logical
# Logical return (field ^ 0x80 == 1)
return (field ^ 0x80 == 1) elif field_desc.type == 0x0c or field_desc.type == 0x0d:
elif field_desc.type == 0x0c or field_desc.type == 0x0d: # Memo or Blob
# Memo or Blob block_start, blob_size = \
block_start, blob_size = \ struct.unpack_from('<II', field, len(field)-10)
struct.unpack_from('<II', field, len(field)-10) sub_block = block_start & 0xff
sub_block = block_start & 0xff block_start &= ~0xff
block_start &= ~0xff self.memo_file.seek(block_start)
self.memo_file.seek(block_start) memo_block_type, = struct.unpack('b', self.memo_file.read(1))
memo_block_type, = struct.unpack('b', self.memo_file.read(1)) if memo_block_type == 2:
if memo_block_type == 2: self.memo_file.seek(8, os.SEEK_CUR)
self.memo_file.seek(8, os.SEEK_CUR) elif memo_block_type == 3:
elif memo_block_type == 3: if sub_block > 63:
if sub_block > 63: return u''
return u'' self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR)
self.memo_file.seek(11 + (5 * sub_block), os.SEEK_CUR) sub_block_start, = struct.unpack('B', self.memo_file.read(1))
sub_block_start, = struct.unpack('B', self.memo_file.read(1)) self.memo_file.seek(block_start + (sub_block_start * 16))
self.memo_file.seek(block_start + (sub_block_start * 16)) else:
else: return u''
return u'' return self.memo_file.read(blob_size)
return self.memo_file.read(blob_size) else:
else: return 0
return 0