forked from openlp/openlp
Trying to regexify and make the OSIS importer more robust.
This commit is contained in:
parent
dc0f7e7f5a
commit
1181152c5a
@ -28,6 +28,7 @@ import os.path
|
|||||||
import logging
|
import logging
|
||||||
import chardet
|
import chardet
|
||||||
import codecs
|
import codecs
|
||||||
|
import re
|
||||||
|
|
||||||
from PyQt4 import QtCore
|
from PyQt4 import QtCore
|
||||||
|
|
||||||
@ -53,7 +54,8 @@ class BibleOSISImpl():
|
|||||||
A reference to a Bible database object.
|
A reference to a Bible database object.
|
||||||
"""
|
"""
|
||||||
log.info(u'BibleOSISImpl Initialising')
|
log.info(u'BibleOSISImpl Initialising')
|
||||||
self.verse_regex = re.compile(r'<verse osisID="([a-zA-Z0-9 ]*).([0-9]*).([0-9]*)">(.*)</verse>')
|
self.verse_regex = re.compile(r'<verse osisID="([a-zA-Z0-9 ]*).([0-9]*).([0-9]*)">(.*?)</verse>')
|
||||||
|
self.note_regex = re.compile(r'<note([a-zA-Z0-9 "=]*)>(.*?)</note>')
|
||||||
self.bibledb = bibledb
|
self.bibledb = bibledb
|
||||||
# books of the bible linked to bibleid {osis , name}
|
# books of the bible linked to bibleid {osis , name}
|
||||||
self.booksOfBible = {}
|
self.booksOfBible = {}
|
||||||
@ -61,7 +63,7 @@ class BibleOSISImpl():
|
|||||||
self.abbrevOfBible = {}
|
self.abbrevOfBible = {}
|
||||||
filepath = os.path.split(os.path.abspath(__file__))[0]
|
filepath = os.path.split(os.path.abspath(__file__))[0]
|
||||||
filepath = os.path.abspath(os.path.join(
|
filepath = os.path.abspath(os.path.join(
|
||||||
filepath, u'..', u'resources',u'osisbooks.csv'))
|
filepath, u'..', u'resources', u'osisbooks.csv'))
|
||||||
fbibles = None
|
fbibles = None
|
||||||
self.loadbible = True
|
self.loadbible = True
|
||||||
try:
|
try:
|
||||||
@ -109,84 +111,102 @@ class BibleOSISImpl():
|
|||||||
osis = None
|
osis = None
|
||||||
try:
|
try:
|
||||||
osis = codecs.open(osisfile_record, u'r', details['encoding'])
|
osis = codecs.open(osisfile_record, u'r', details['encoding'])
|
||||||
book_ptr = None
|
last_chapter = u'0'
|
||||||
count = 0
|
|
||||||
verseText = u'<verse osisID='
|
|
||||||
testament = 1
|
|
||||||
for file_record in osis:
|
for file_record in osis:
|
||||||
# cancel pressed on UI
|
match = self.verse_regex.search(file_record)
|
||||||
if not self.loadbible:
|
if match:
|
||||||
break
|
print 'Found:', match.group(4)
|
||||||
pos = file_record.find(verseText)
|
#if last_chapter != match.group(2):
|
||||||
# we have a verse
|
print match.group(2)
|
||||||
if pos > -1:
|
# dialogobject.incrementProgressBar(
|
||||||
epos = file_record.find(u'>', pos)
|
# u'Importing %s %s...' % \
|
||||||
# Book Reference
|
# (self.books[match.group(1)], match.group(2)))
|
||||||
ref = file_record[pos+15:epos-1]
|
# last_chapter = match.group(2)
|
||||||
#lets find the bible text only
|
verse_text = match.group(4)
|
||||||
# find start of text
|
#verse_text = self.remove_block(
|
||||||
pos = epos + 1
|
#verse_text = self.note_regex.sub(lambda match: u'', verse_text)
|
||||||
# end of text
|
print verse_text
|
||||||
epos = file_record.find(u'</verse>', pos)
|
else:
|
||||||
text = file_record[pos : epos]
|
print 'Not found...', file_record[:10]
|
||||||
#remove tags of extra information
|
# (self.booksOfBible[p[0]], p[1]))
|
||||||
text = self.remove_block(u'<title', u'</title>', text)
|
# book_ptr = None
|
||||||
text = self.remove_block(u'<note', u'</note>', text)
|
# count = 0
|
||||||
text = self.remove_block(
|
# verseText = u'<verse osisID='
|
||||||
u'<divineName', u'</divineName>', text)
|
# testament = 1
|
||||||
text = self.remove_tag(u'<lb', text)
|
# for file_record in osis:
|
||||||
text = self.remove_tag(u'<q', text)
|
# # cancel pressed on UI
|
||||||
text = self.remove_tag(u'<l', text)
|
# if not self.loadbible:
|
||||||
text = self.remove_tag(u'<lg', text)
|
# break
|
||||||
# Strange tags where the end is not the same as the start
|
# pos = file_record.find(verseText)
|
||||||
# The must be in this order as at least one bible has them
|
# # we have a verse
|
||||||
# crossing and the removal does not work.
|
# if pos > -1:
|
||||||
pos = text.find(u'<FI>')
|
# epos = file_record.find(u'>', pos)
|
||||||
while pos > -1:
|
# # Book Reference
|
||||||
epos = text.find(u'<Fi>', pos)
|
# ref = file_record[pos+15:epos-1]
|
||||||
if epos == -1: # TODO
|
# #lets find the bible text only
|
||||||
pos = -1
|
# # find start of text
|
||||||
else:
|
# pos = epos + 1
|
||||||
text = text[:pos] + text[epos + 4: ]
|
# # end of text
|
||||||
pos = text.find(u'<FI>')
|
# epos = file_record.find(u'</verse>', pos)
|
||||||
pos = text.find(u'<RF>')
|
# text = file_record[pos : epos]
|
||||||
while pos > -1:
|
# #remove tags of extra information
|
||||||
epos = text.find(u'<Rf>', pos)
|
# text = self.remove_block(u'<title', u'</title>', text)
|
||||||
text = text[:pos] + text[epos + 4: ]
|
# text = self.remove_block(u'<note', u'</note>', text)
|
||||||
pos = text.find(u'<RF>')
|
# text = self.remove_block(
|
||||||
print ref
|
# u'<divineName', u'</divineName>', text)
|
||||||
continue
|
# text = self.remove_tag(u'<lb', text)
|
||||||
# split up the reference
|
# text = self.remove_tag(u'<q', text)
|
||||||
p = ref.split(u'.', 3)
|
# text = self.remove_tag(u'<l', text)
|
||||||
if book_ptr != p[0]:
|
# text = self.remove_tag(u'<lg', text)
|
||||||
# first time through
|
# # Strange tags where the end is not the same as the start
|
||||||
if book_ptr is None:
|
# # The must be in this order as at least one bible has them
|
||||||
# set the max book size depending
|
# # crossing and the removal does not work.
|
||||||
# on the first book read
|
# pos = text.find(u'<FI>')
|
||||||
if p[0] == u'Gen':
|
# while pos > -1:
|
||||||
dialogobject.ImportProgressBar.setMaximum(1188)
|
# epos = text.find(u'<Fi>', pos)
|
||||||
else:
|
# if epos == -1: # TODO
|
||||||
dialogobject.ImportProgressBar.setMaximum(260)
|
# pos = -1
|
||||||
# First book of NT
|
# else:
|
||||||
if p[0] == u'Matt':
|
# text = text[:pos] + text[epos + 4: ]
|
||||||
testament += 1
|
# pos = text.find(u'<FI>')
|
||||||
dialogobject.incrementProgressBar(u'Importing %s %s...' % \
|
# pos = text.find(u'<RF>')
|
||||||
(self.booksOfBible[p[0]], p[1]))
|
# while pos > -1:
|
||||||
Receiver.send_message(u'process_events')
|
# epos = text.find(u'<Rf>', pos)
|
||||||
self.bibledb.save_verses()
|
# text = text[:pos] + text[epos + 4: ]
|
||||||
book_ptr = p[0]
|
# pos = text.find(u'<RF>')
|
||||||
book = self.bibledb.create_book(
|
# print ref
|
||||||
unicode(self.booksOfBible[p[0]]),
|
# continue
|
||||||
unicode(self.abbrevOfBible[p[0]]),
|
# # split up the reference
|
||||||
testament)
|
# p = ref.split(u'.', 3)
|
||||||
count = 0
|
# if book_ptr != p[0]:
|
||||||
self.bibledb.add_verse(book.id, p[1], p[2], text)
|
# # first time through
|
||||||
#count += 1
|
# if book_ptr is None:
|
||||||
#Every 3 verses repaint the screen
|
# # set the max book size depending
|
||||||
#if count % 3 == 0:
|
# # on the first book read
|
||||||
# Receiver.send_message(u'process_events')
|
# if p[0] == u'Gen':
|
||||||
# count = 0
|
# dialogobject.ImportProgressBar.setMaximum(1188)
|
||||||
#self.bibledb.save_verses()
|
# else:
|
||||||
|
# dialogobject.ImportProgressBar.setMaximum(260)
|
||||||
|
# # First book of NT
|
||||||
|
# if p[0] == u'Matt':
|
||||||
|
# testament += 1
|
||||||
|
# dialogobject.incrementProgressBar(u'Importing %s %s...' % \
|
||||||
|
# (self.booksOfBible[p[0]], p[1]))
|
||||||
|
# Receiver.send_message(u'process_events')
|
||||||
|
# self.bibledb.save_verses()
|
||||||
|
# book_ptr = p[0]
|
||||||
|
# book = self.bibledb.create_book(
|
||||||
|
# unicode(self.booksOfBible[p[0]]),
|
||||||
|
# unicode(self.abbrevOfBible[p[0]]),
|
||||||
|
# testament)
|
||||||
|
# count = 0
|
||||||
|
# self.bibledb.add_verse(book.id, p[1], p[2], text)
|
||||||
|
# #count += 1
|
||||||
|
# #Every 3 verses repaint the screen
|
||||||
|
# #if count % 3 == 0:
|
||||||
|
# # Receiver.send_message(u'process_events')
|
||||||
|
# # count = 0
|
||||||
|
# #self.bibledb.save_verses()
|
||||||
except:
|
except:
|
||||||
log.exception(u'Loading bible from OSIS file failed')
|
log.exception(u'Loading bible from OSIS file failed')
|
||||||
finally:
|
finally:
|
||||||
|
Loading…
Reference in New Issue
Block a user