forked from openlp/openlp
Fix bug #595676 and improve the OSIS importer.
This commit is contained in:
parent
c7939ea03c
commit
aa2d1586ea
@ -62,10 +62,13 @@ class OSISBible(BibleDB):
|
||||
self.fi_regex = re.compile(r'<FI>(.*?)<Fi>')
|
||||
self.rf_regex = re.compile(r'<RF>(.*?)<Rf>')
|
||||
self.lb_regex = re.compile(r'<lb(.*?)>')
|
||||
self.lg_regex = re.compile(r'<lg(.*?)>')
|
||||
self.l_regex = re.compile(r'<l (.*?)>')
|
||||
self.w_regex = re.compile(r'<w (.*?)>')
|
||||
self.q_regex = re.compile(r'<q (.*?)>')
|
||||
self.q1_regex = re.compile(r'<q(.*?)level="1"(.*?)>')
|
||||
self.q2_regex = re.compile(r'<q(.*?)level="2"(.*?)>')
|
||||
self.trans_regex = re.compile(r'<transChange(.*?)>(.*?)</transChange>')
|
||||
self.divineName_regex = re.compile(r'<divineName(.*?)>(.*?)</divineName>')
|
||||
self.spaces_regex = re.compile(r'([ ]{2,})')
|
||||
self.books = {}
|
||||
filepath = os.path.join(
|
||||
@ -96,7 +99,7 @@ class OSISBible(BibleDB):
|
||||
detect_file = None
|
||||
try:
|
||||
detect_file = open(self.filename, u'r')
|
||||
details = chardet.detect(detect_file.read())
|
||||
details = chardet.detect(detect_file.read(1048576))
|
||||
except IOError:
|
||||
log.exception(u'Failed to detect OSIS file encoding')
|
||||
return
|
||||
@ -150,11 +153,14 @@ class OSISBible(BibleDB):
|
||||
verse_text = self.milestone_regex.sub(u'', verse_text)
|
||||
verse_text = self.fi_regex.sub(u'', verse_text)
|
||||
verse_text = self.rf_regex.sub(u'', verse_text)
|
||||
verse_text = self.lb_regex.sub(u'', verse_text)
|
||||
verse_text = self.lb_regex.sub(u' ', verse_text)
|
||||
verse_text = self.lg_regex.sub(u'', verse_text)
|
||||
verse_text = self.l_regex.sub(u'', verse_text)
|
||||
verse_text = self.w_regex.sub(u'', verse_text)
|
||||
verse_text = self.q_regex.sub(u'', verse_text)
|
||||
verse_text = self.q1_regex.sub(u'"', verse_text)
|
||||
verse_text = self.q2_regex.sub(u'\'', verse_text)
|
||||
verse_text = self.trans_regex.sub(u'', verse_text)
|
||||
verse_text = self.divineName_regex.sub(u'', verse_text)
|
||||
verse_text = verse_text.replace(u'</lb>', u'')\
|
||||
.replace(u'</l>', u'').replace(u'<lg>', u'')\
|
||||
.replace(u'</lg>', u'').replace(u'</q>', u'')\
|
||||
|
Loading…
Reference in New Issue
Block a user