2009-09-08 19:58:05 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2013-01-01 16:33:41 +00:00
|
|
|
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
|
2009-09-08 19:58:05 +00:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# OpenLP - Open Source Lyrics Projection #
|
|
|
|
# --------------------------------------------------------------------------- #
|
2012-12-29 20:56:56 +00:00
|
|
|
# Copyright (c) 2008-2013 Raoul Snyman #
|
|
|
|
# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan #
|
2012-06-22 14:14:53 +00:00
|
|
|
# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, #
|
2012-11-11 21:16:14 +00:00
|
|
|
# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer. #
|
2012-10-21 13:16:22 +00:00
|
|
|
# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru, #
|
|
|
|
# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, #
|
|
|
|
# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock, #
|
2012-12-01 07:57:54 +00:00
|
|
|
# Frode Woldsund, Martin Zibricky, Patrick Zimmermann #
|
2009-09-08 19:58:05 +00:00
|
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
# This program is free software; you can redistribute it and/or modify it #
|
|
|
|
# under the terms of the GNU General Public License as published by the Free #
|
|
|
|
# Software Foundation; version 2 of the License. #
|
|
|
|
# #
|
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT #
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
|
|
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
|
|
|
|
# more details. #
|
|
|
|
# #
|
|
|
|
# You should have received a copy of the GNU General Public License along #
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
|
|
|
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
|
|
|
###############################################################################
|
2009-07-13 20:08:43 +00:00
|
|
|
|
2009-06-04 20:06:32 +00:00
|
|
|
import os
|
2009-02-02 19:54:38 +00:00
|
|
|
import logging
|
2009-07-09 05:15:26 +00:00
|
|
|
import chardet
|
2009-07-10 15:41:08 +00:00
|
|
|
import codecs
|
2009-12-23 21:09:07 +00:00
|
|
|
import re
|
2009-07-13 20:08:43 +00:00
|
|
|
|
2013-02-03 09:07:31 +00:00
|
|
|
from openlp.core.lib import translate
|
2010-03-12 21:55:52 +00:00
|
|
|
from openlp.core.utils import AppLocation
|
2011-03-17 19:40:01 +00:00
|
|
|
from openlp.plugins.bibles.lib.db import BibleDB, BiblesResourcesDB
|
2008-11-19 18:13:22 +00:00
|
|
|
|
2010-02-27 15:31:23 +00:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2013-04-18 17:45:14 +00:00
|
|
|
|
2011-04-27 14:54:27 +00:00
|
|
|
def replacement(match):
|
|
|
|
return match.group(2).upper()
|
|
|
|
|
2013-04-18 17:45:14 +00:00
|
|
|
|
2009-12-30 17:29:08 +00:00
|
|
|
class OSISBible(BibleDB):
|
2009-07-13 20:08:43 +00:00
|
|
|
"""
|
2011-02-24 14:43:24 +00:00
|
|
|
`OSIS <http://www.bibletechnologies.net/>`_ Bible format importer class.
|
2009-07-13 20:08:43 +00:00
|
|
|
"""
|
2009-03-16 17:33:51 +00:00
|
|
|
log.info(u'BibleOSISImpl loaded')
|
2009-06-04 20:06:32 +00:00
|
|
|
|
2010-01-31 19:49:01 +00:00
|
|
|
def __init__(self, parent, **kwargs):
|
2010-12-06 19:30:04 +00:00
|
|
|
log.debug(self.__class__.__name__)
|
2010-01-31 19:49:01 +00:00
|
|
|
BibleDB.__init__(self, parent, **kwargs)
|
2009-12-30 17:29:08 +00:00
|
|
|
self.filename = kwargs[u'filename']
|
2012-02-29 12:00:05 +00:00
|
|
|
self.language_regex = re.compile(r'<language.*>(.*?)</language>')
|
2013-04-18 17:45:14 +00:00
|
|
|
self.verse_regex = re.compile(r'<verse osisID="([a-zA-Z0-9 ]*).([0-9]*).([0-9]*)">(.*?)</verse>')
|
2009-12-23 22:31:14 +00:00
|
|
|
self.note_regex = re.compile(r'<note(.*?)>(.*?)</note>')
|
|
|
|
self.title_regex = re.compile(r'<title(.*?)>(.*?)</title>')
|
|
|
|
self.milestone_regex = re.compile(r'<milestone(.*?)/>')
|
2009-12-24 15:12:09 +00:00
|
|
|
self.fi_regex = re.compile(r'<FI>(.*?)<Fi>')
|
|
|
|
self.rf_regex = re.compile(r'<RF>(.*?)<Rf>')
|
2009-12-23 22:31:14 +00:00
|
|
|
self.lb_regex = re.compile(r'<lb(.*?)>')
|
2010-06-24 18:35:01 +00:00
|
|
|
self.lg_regex = re.compile(r'<lg(.*?)>')
|
2009-12-23 22:31:14 +00:00
|
|
|
self.l_regex = re.compile(r'<l (.*?)>')
|
|
|
|
self.w_regex = re.compile(r'<w (.*?)>')
|
2011-04-27 16:15:35 +00:00
|
|
|
self.q_regex = re.compile(r'<q(.*?)>')
|
2010-06-24 18:35:01 +00:00
|
|
|
self.q1_regex = re.compile(r'<q(.*?)level="1"(.*?)>')
|
|
|
|
self.q2_regex = re.compile(r'<q(.*?)level="2"(.*?)>')
|
2010-03-21 22:16:43 +00:00
|
|
|
self.trans_regex = re.compile(r'<transChange(.*?)>(.*?)</transChange>')
|
2013-04-18 17:45:14 +00:00
|
|
|
self.divine_name_regex = re.compile(r'<divineName(.*?)>(.*?)</divineName>')
|
2009-12-23 22:31:14 +00:00
|
|
|
self.spaces_regex = re.compile(r'([ ]{2,})')
|
2010-03-12 21:55:52 +00:00
|
|
|
filepath = os.path.join(
|
2013-01-01 16:33:41 +00:00
|
|
|
AppLocation.get_directory(AppLocation.PluginsDir), u'bibles', u'resources', u'osisbooks.csv')
|
2009-06-04 20:06:32 +00:00
|
|
|
|
2011-05-26 19:13:11 +00:00
|
|
|
def do_import(self, bible_name=None):
|
2009-07-13 20:08:43 +00:00
|
|
|
"""
|
|
|
|
Loads a Bible from file.
|
|
|
|
"""
|
2009-12-30 17:29:08 +00:00
|
|
|
log.debug(u'Starting OSIS import from "%s"' % self.filename)
|
2009-11-07 00:00:36 +00:00
|
|
|
detect_file = None
|
2010-12-06 19:30:04 +00:00
|
|
|
db_book = None
|
|
|
|
osis = None
|
|
|
|
success = True
|
|
|
|
last_chapter = 0
|
|
|
|
match_count = 0
|
2013-03-07 08:05:43 +00:00
|
|
|
self.wizard.increment_progress_bar(translate('BiblesPlugin.OsisImport',
|
2010-12-06 19:30:04 +00:00
|
|
|
'Detecting encoding (this may take a few minutes)...'))
|
2009-11-07 00:00:36 +00:00
|
|
|
try:
|
2009-12-30 17:29:08 +00:00
|
|
|
detect_file = open(self.filename, u'r')
|
2010-06-24 18:35:01 +00:00
|
|
|
details = chardet.detect(detect_file.read(1048576))
|
2012-04-18 20:07:38 +00:00
|
|
|
detect_file.seek(0)
|
|
|
|
lines_in_file = int(len(detect_file.readlines()))
|
2010-05-29 19:50:50 +00:00
|
|
|
except IOError:
|
2009-11-07 00:00:36 +00:00
|
|
|
log.exception(u'Failed to detect OSIS file encoding')
|
|
|
|
return
|
|
|
|
finally:
|
|
|
|
if detect_file:
|
|
|
|
detect_file.close()
|
|
|
|
try:
|
2009-12-30 17:29:08 +00:00
|
|
|
osis = codecs.open(self.filename, u'r', details['encoding'])
|
2011-04-27 14:54:27 +00:00
|
|
|
repl = replacement
|
2012-03-01 18:09:47 +00:00
|
|
|
language_id = False
|
2012-05-17 18:57:01 +00:00
|
|
|
# Decide if the bible propably contains only NT or AT and NT or
|
2012-04-18 20:07:38 +00:00
|
|
|
# AT, NT and Apocrypha
|
|
|
|
if lines_in_file < 11500:
|
|
|
|
book_count = 27
|
|
|
|
chapter_count = 260
|
|
|
|
elif lines_in_file < 34200:
|
|
|
|
book_count = 66
|
|
|
|
chapter_count = 1188
|
|
|
|
else:
|
|
|
|
book_count = 67
|
|
|
|
chapter_count = 1336
|
2012-02-29 12:00:05 +00:00
|
|
|
for file_record in osis:
|
|
|
|
if self.stop_import_flag:
|
|
|
|
break
|
2012-03-01 18:09:47 +00:00
|
|
|
# Try to find the bible language
|
|
|
|
if not language_id:
|
|
|
|
language_match = self.language_regex.search(file_record)
|
|
|
|
if language_match:
|
|
|
|
language = BiblesResourcesDB.get_language(
|
|
|
|
language_match.group(1))
|
|
|
|
if language:
|
|
|
|
language_id = language[u'id']
|
2012-03-21 20:57:07 +00:00
|
|
|
self.save_meta(u'language_id', language_id)
|
2012-03-01 18:09:47 +00:00
|
|
|
continue
|
|
|
|
match = self.verse_regex.search(file_record)
|
2012-02-29 12:00:05 +00:00
|
|
|
if match:
|
2012-03-01 18:09:47 +00:00
|
|
|
# Set meta language_id if not detected till now
|
|
|
|
if not language_id:
|
2012-02-29 12:00:05 +00:00
|
|
|
language_id = self.get_language(bible_name)
|
|
|
|
if not language_id:
|
2013-01-01 16:33:41 +00:00
|
|
|
log.exception(u'Importing books from "%s" failed' % self.filename)
|
2012-02-29 12:00:05 +00:00
|
|
|
return False
|
2010-03-19 22:08:06 +00:00
|
|
|
match_count += 1
|
2012-04-18 20:07:38 +00:00
|
|
|
book = unicode(match.group(1))
|
2009-12-23 22:31:14 +00:00
|
|
|
chapter = int(match.group(2))
|
|
|
|
verse = int(match.group(3))
|
2009-12-23 21:09:07 +00:00
|
|
|
verse_text = match.group(4)
|
2013-01-01 16:33:41 +00:00
|
|
|
book_ref_id = self.get_book_ref_id_by_name(book, book_count, language_id)
|
2012-04-18 20:07:38 +00:00
|
|
|
if not book_ref_id:
|
2013-01-01 16:33:41 +00:00
|
|
|
log.exception(u'Importing books from "%s" failed' % self.filename)
|
2012-04-18 20:07:38 +00:00
|
|
|
return False
|
|
|
|
book_details = BiblesResourcesDB.get_book_by_id(book_ref_id)
|
|
|
|
if not db_book or db_book.name != book_details[u'name']:
|
|
|
|
log.debug(u'New book: "%s"' % book_details[u'name'])
|
2010-02-06 15:33:23 +00:00
|
|
|
db_book = self.create_book(
|
2012-04-18 20:07:38 +00:00
|
|
|
book_details[u'name'],
|
2011-03-17 19:40:01 +00:00
|
|
|
book_ref_id,
|
|
|
|
book_details[u'testament_id'])
|
2009-12-23 22:31:14 +00:00
|
|
|
if last_chapter == 0:
|
2013-03-07 08:05:43 +00:00
|
|
|
self.wizard.progress_bar.setMaximum(chapter_count)
|
2009-12-23 22:31:14 +00:00
|
|
|
if last_chapter != chapter:
|
|
|
|
if last_chapter != 0:
|
2010-06-15 00:44:06 +00:00
|
|
|
self.session.commit()
|
2013-03-07 08:05:43 +00:00
|
|
|
self.wizard.increment_progress_bar(translate('BiblesPlugin.OsisImport', 'Importing %s %s...',
|
2013-01-01 16:33:41 +00:00
|
|
|
'Importing <book name> <chapter>...') % (book_details[u'name'], chapter))
|
2009-12-23 22:31:14 +00:00
|
|
|
last_chapter = chapter
|
2013-04-18 17:45:14 +00:00
|
|
|
# All of this rigmarol below is because the mod2osis tool from the Sword library embeds XML in the
|
|
|
|
# OSIS but neglects to enclose the verse text (with XML) in <[CDATA[ ]]> tags.
|
2009-12-23 22:31:14 +00:00
|
|
|
verse_text = self.note_regex.sub(u'', verse_text)
|
|
|
|
verse_text = self.title_regex.sub(u'', verse_text)
|
|
|
|
verse_text = self.milestone_regex.sub(u'', verse_text)
|
2009-12-24 15:12:09 +00:00
|
|
|
verse_text = self.fi_regex.sub(u'', verse_text)
|
|
|
|
verse_text = self.rf_regex.sub(u'', verse_text)
|
2010-06-24 18:35:01 +00:00
|
|
|
verse_text = self.lb_regex.sub(u' ', verse_text)
|
|
|
|
verse_text = self.lg_regex.sub(u'', verse_text)
|
2011-04-27 14:54:27 +00:00
|
|
|
verse_text = self.l_regex.sub(u' ', verse_text)
|
2009-12-23 22:31:14 +00:00
|
|
|
verse_text = self.w_regex.sub(u'', verse_text)
|
2010-06-24 18:35:01 +00:00
|
|
|
verse_text = self.q1_regex.sub(u'"', verse_text)
|
|
|
|
verse_text = self.q2_regex.sub(u'\'', verse_text)
|
2011-04-27 16:15:35 +00:00
|
|
|
verse_text = self.q_regex.sub(u'', verse_text)
|
2011-04-27 14:54:27 +00:00
|
|
|
verse_text = self.divine_name_regex.sub(repl, verse_text)
|
2010-03-21 22:16:43 +00:00
|
|
|
verse_text = self.trans_regex.sub(u'', verse_text)
|
2013-01-01 16:33:41 +00:00
|
|
|
verse_text = verse_text.replace(u'</lb>', u'') \
|
|
|
|
.replace(u'</l>', u'').replace(u'<lg>', u'') \
|
|
|
|
.replace(u'</lg>', u'').replace(u'</q>', u'') \
|
2010-04-30 21:00:17 +00:00
|
|
|
.replace(u'</div>', u'').replace(u'</w>', u'')
|
2009-12-23 22:31:14 +00:00
|
|
|
verse_text = self.spaces_regex.sub(u' ', verse_text)
|
2010-01-17 16:48:45 +00:00
|
|
|
self.create_verse(db_book.id, chapter, verse, verse_text)
|
2013-02-03 19:23:12 +00:00
|
|
|
self.application.process_events()
|
2010-06-15 00:44:06 +00:00
|
|
|
self.session.commit()
|
2010-03-19 22:08:06 +00:00
|
|
|
if match_count == 0:
|
|
|
|
success = False
|
2010-05-29 19:50:50 +00:00
|
|
|
except (ValueError, IOError):
|
2009-11-07 00:00:36 +00:00
|
|
|
log.exception(u'Loading bible from OSIS file failed')
|
2009-12-27 22:19:39 +00:00
|
|
|
success = False
|
2009-11-07 00:00:36 +00:00
|
|
|
finally:
|
|
|
|
if osis:
|
|
|
|
osis.close()
|
2010-02-06 15:33:23 +00:00
|
|
|
if self.stop_import_flag:
|
2009-12-27 22:19:39 +00:00
|
|
|
return False
|
|
|
|
else:
|
2010-12-28 11:18:56 +00:00
|
|
|
return success
|