openlp/openlp/plugins/bibles/lib/osis.py

188 lines
8.7 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
2009-12-31 12:52:01 +00:00
# Copyright (c) 2008-2010 Raoul Snyman #
# Portions copyright (c) 2008-2010 Tim Bentley, Jonathan Corwin, Michael #
2010-07-24 22:10:47 +00:00
# Gorven, Scott Guerrieri, Meinert Jordan, Andreas Preikschat, Christian #
# Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon Tibble, #
# Carsten Tinggaard, Frode Woldsund #
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
import os
import os.path
import logging
import chardet
2009-07-10 15:41:08 +00:00
import codecs
import re
from PyQt4 import QtCore
2009-10-24 16:40:36 +00:00
from openlp.core.lib import Receiver
from openlp.core.utils import AppLocation
from db import BibleDB
2010-02-27 15:31:23 +00:00
log = logging.getLogger(__name__)
class OSISBible(BibleDB):
"""
OSIS Bible format importer class.
"""
log.info(u'BibleOSISImpl loaded')
def __init__(self, parent, **kwargs):
"""
Constructor to create and set up an instance of the OpenSongBible
class. This class is used to import Bibles from OpenSong's XML format.
"""
log.debug(__name__)
BibleDB.__init__(self, parent, **kwargs)
if u'filename' not in kwargs:
raise KeyError(u'You have to supply a file name to import from.')
self.filename = kwargs[u'filename']
2009-12-24 18:03:49 +00:00
self.verse_regex = re.compile(
r'<verse osisID="([a-zA-Z0-9 ]*).([0-9]*).([0-9]*)">(.*?)</verse>')
2009-12-23 22:31:14 +00:00
self.note_regex = re.compile(r'<note(.*?)>(.*?)</note>')
self.title_regex = re.compile(r'<title(.*?)>(.*?)</title>')
self.milestone_regex = re.compile(r'<milestone(.*?)/>')
2009-12-24 15:12:09 +00:00
self.fi_regex = re.compile(r'<FI>(.*?)<Fi>')
self.rf_regex = re.compile(r'<RF>(.*?)<Rf>')
2009-12-23 22:31:14 +00:00
self.lb_regex = re.compile(r'<lb(.*?)>')
self.lg_regex = re.compile(r'<lg(.*?)>')
2009-12-23 22:31:14 +00:00
self.l_regex = re.compile(r'<l (.*?)>')
self.w_regex = re.compile(r'<w (.*?)>')
self.q1_regex = re.compile(r'<q(.*?)level="1"(.*?)>')
self.q2_regex = re.compile(r'<q(.*?)level="2"(.*?)>')
self.trans_regex = re.compile(r'<transChange(.*?)>(.*?)</transChange>')
2010-06-25 19:01:03 +00:00
self.divineName_regex = re.compile(
r'<divineName(.*?)>(.*?)</divineName>')
2009-12-23 22:31:14 +00:00
self.spaces_regex = re.compile(r'([ ]{2,})')
self.books = {}
filepath = os.path.join(
AppLocation.get_directory(AppLocation.PluginsDir), u'bibles',
u'resources', u'osisbooks.csv')
2009-11-07 00:00:36 +00:00
fbibles = None
try:
fbibles = open(filepath, u'r')
for line in fbibles:
2009-12-24 15:12:09 +00:00
book = line.split(u',')
self.books[book[0]] = (book[1].lstrip().rstrip(),
book[2].lstrip().rstrip())
2010-05-29 19:50:50 +00:00
except IOError:
2009-11-07 00:00:36 +00:00
log.exception(u'OSIS bible import failed')
finally:
if fbibles:
fbibles.close()
2010-02-06 15:33:23 +00:00
QtCore.QObject.connect(Receiver.get_receiver(),
2010-04-16 07:31:01 +00:00
QtCore.SIGNAL(u'bibles_stop_import'), self.stop_import)
def do_import(self):
"""
Loads a Bible from file.
"""
log.debug(u'Starting OSIS import from "%s"' % self.filename)
2010-04-27 16:27:57 +00:00
self.wizard.incrementProgressBar(
u'Detecting encoding (this may take a few minutes)...')
2009-11-07 00:00:36 +00:00
detect_file = None
try:
detect_file = open(self.filename, u'r')
details = chardet.detect(detect_file.read(1048576))
2010-05-29 19:50:50 +00:00
except IOError:
2009-11-07 00:00:36 +00:00
log.exception(u'Failed to detect OSIS file encoding')
return
finally:
if detect_file:
detect_file.close()
osis = None
success = True
2009-11-07 00:00:36 +00:00
try:
osis = codecs.open(self.filename, u'r', details['encoding'])
2009-12-23 22:31:14 +00:00
last_chapter = 0
testament = 1
2010-03-19 22:08:06 +00:00
match_count = 0
2009-12-24 15:12:09 +00:00
db_book = None
for file_record in osis:
2010-02-06 15:33:23 +00:00
if self.stop_import_flag:
break
match = self.verse_regex.search(file_record)
if match:
2010-03-19 22:08:06 +00:00
match_count += 1
2009-12-23 22:31:14 +00:00
book = match.group(1)
chapter = int(match.group(2))
verse = int(match.group(3))
verse_text = match.group(4)
if not db_book or db_book.name != self.books[book][0]:
log.debug('New book: "%s"', self.books[book][0])
2009-12-24 15:12:09 +00:00
if book == u'Matt':
testament += 1
2010-02-06 15:33:23 +00:00
db_book = self.create_book(
2009-12-24 15:12:09 +00:00
unicode(self.books[book][0]),
unicode(self.books[book][1]),
testament)
2009-12-23 22:31:14 +00:00
if last_chapter == 0:
if book == u'Gen':
self.wizard.ImportProgressBar.setMaximum(1188)
2009-12-23 22:31:14 +00:00
else:
self.wizard.ImportProgressBar.setMaximum(260)
2009-12-23 22:31:14 +00:00
if last_chapter != chapter:
if last_chapter != 0:
2010-06-15 00:44:06 +00:00
self.session.commit()
self.wizard.incrementProgressBar(
2009-12-23 22:31:14 +00:00
u'Importing %s %s...' % \
(self.books[match.group(1)][0], chapter))
last_chapter = chapter
2009-12-24 15:12:09 +00:00
# All of this rigmarol below is because the mod2osis
# tool from the Sword library embeds XML in the OSIS
# but neglects to enclose the verse text (with XML) in
# <[CDATA[ ]]> tags.
2009-12-23 22:31:14 +00:00
verse_text = self.note_regex.sub(u'', verse_text)
verse_text = self.title_regex.sub(u'', verse_text)
verse_text = self.milestone_regex.sub(u'', verse_text)
2009-12-24 15:12:09 +00:00
verse_text = self.fi_regex.sub(u'', verse_text)
verse_text = self.rf_regex.sub(u'', verse_text)
verse_text = self.lb_regex.sub(u' ', verse_text)
verse_text = self.lg_regex.sub(u'', verse_text)
2009-12-23 22:31:14 +00:00
verse_text = self.l_regex.sub(u'', verse_text)
verse_text = self.w_regex.sub(u'', verse_text)
verse_text = self.q1_regex.sub(u'"', verse_text)
verse_text = self.q2_regex.sub(u'\'', verse_text)
verse_text = self.trans_regex.sub(u'', verse_text)
verse_text = self.divineName_regex.sub(u'', verse_text)
2009-12-23 22:31:14 +00:00
verse_text = verse_text.replace(u'</lb>', u'')\
.replace(u'</l>', u'').replace(u'<lg>', u'')\
.replace(u'</lg>', u'').replace(u'</q>', u'')\
2010-04-30 21:00:17 +00:00
.replace(u'</div>', u'').replace(u'</w>', u'')
2009-12-23 22:31:14 +00:00
verse_text = self.spaces_regex.sub(u' ', verse_text)
self.create_verse(db_book.id, chapter, verse, verse_text)
2010-04-16 07:31:01 +00:00
Receiver.send_message(u'openlp_process_events')
2010-06-15 00:44:06 +00:00
self.session.commit()
self.wizard.incrementProgressBar(u'Finishing import...')
2010-03-19 22:08:06 +00:00
if match_count == 0:
success = False
2010-05-29 19:50:50 +00:00
except (ValueError, IOError):
2009-11-07 00:00:36 +00:00
log.exception(u'Loading bible from OSIS file failed')
success = False
2009-11-07 00:00:36 +00:00
finally:
if osis:
osis.close()
2010-02-06 15:33:23 +00:00
if self.stop_import_flag:
self.wizard.incrementProgressBar(u'Import canceled!')
return False
else:
2010-07-27 09:32:52 +00:00
return success