From aa2d1586eac70995fc22c709a128a0f50466022e Mon Sep 17 00:00:00 2001 From: Raoul Snyman Date: Thu, 24 Jun 2010 20:35:01 +0200 Subject: [PATCH] Fix bug #595676 and improve the OSIS importer. --- openlp/plugins/bibles/lib/osis.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/openlp/plugins/bibles/lib/osis.py b/openlp/plugins/bibles/lib/osis.py index b4a2a2aa1..5b3324e00 100644 --- a/openlp/plugins/bibles/lib/osis.py +++ b/openlp/plugins/bibles/lib/osis.py @@ -62,10 +62,13 @@ class OSISBible(BibleDB): self.fi_regex = re.compile(r'(.*?)') self.rf_regex = re.compile(r'(.*?)') self.lb_regex = re.compile(r'') + self.lg_regex = re.compile(r'') self.l_regex = re.compile(r'') self.w_regex = re.compile(r'') - self.q_regex = re.compile(r'') + self.q1_regex = re.compile(r'') + self.q2_regex = re.compile(r'') self.trans_regex = re.compile(r'(.*?)') + self.divineName_regex = re.compile(r'(.*?)') self.spaces_regex = re.compile(r'([ ]{2,})') self.books = {} filepath = os.path.join( @@ -96,7 +99,7 @@ class OSISBible(BibleDB): detect_file = None try: detect_file = open(self.filename, u'r') - details = chardet.detect(detect_file.read()) + details = chardet.detect(detect_file.read(1048576)) except IOError: log.exception(u'Failed to detect OSIS file encoding') return @@ -150,11 +153,14 @@ class OSISBible(BibleDB): verse_text = self.milestone_regex.sub(u'', verse_text) verse_text = self.fi_regex.sub(u'', verse_text) verse_text = self.rf_regex.sub(u'', verse_text) - verse_text = self.lb_regex.sub(u'', verse_text) + verse_text = self.lb_regex.sub(u' ', verse_text) + verse_text = self.lg_regex.sub(u'', verse_text) verse_text = self.l_regex.sub(u'', verse_text) verse_text = self.w_regex.sub(u'', verse_text) - verse_text = self.q_regex.sub(u'', verse_text) + verse_text = self.q1_regex.sub(u'"', verse_text) + verse_text = self.q2_regex.sub(u'\'', verse_text) verse_text = self.trans_regex.sub(u'', verse_text) + verse_text = self.divineName_regex.sub(u'', verse_text) verse_text = verse_text.replace(u'', u'')\ .replace(u'', u'').replace(u'', u'')\ .replace(u'', u'').replace(u'', u'')\