forked from openlp/openlp
Fix html ripping bug for bibles
This commit is contained in:
parent
f576e9d83d
commit
6c30e67723
@ -116,6 +116,7 @@ class CWExtract(BibleCommon):
|
||||
## Strip Verse Data from Page and build an array
|
||||
##
|
||||
#log.debug(u'bible data %s', xml_string)
|
||||
#print xml_string
|
||||
i= xml_string.find(u'NavCurrentChapter')
|
||||
xml_string = xml_string[i:len(xml_string)]
|
||||
i= xml_string.find(u'<TABLE')
|
||||
@ -136,11 +137,10 @@ class CWExtract(BibleCommon):
|
||||
i = xml_string.find(u'</I></B>', versePos)
|
||||
#log.debug( versePos, i)
|
||||
verse= xml_string[versePos:i] # Got the Chapter
|
||||
#verse = int(temp)
|
||||
#log.debug( 'Chapter = %s', verse)
|
||||
# move the starting position to begining of the text
|
||||
versePos = i + 8
|
||||
# fined the start of the next verse
|
||||
# find the start of the next verse
|
||||
i = xml_string.find(u'<B><I>', versePos)
|
||||
if i == -1:
|
||||
i = xml_string.find(u'</BLOCKQUOTE>',versePos)
|
||||
@ -150,6 +150,7 @@ class CWExtract(BibleCommon):
|
||||
#log.debug( i, versePos)
|
||||
verseText = xml_string[versePos: i]
|
||||
versePos = i
|
||||
#print verseText
|
||||
bible[verse] = self._clean_text(verseText)
|
||||
#bible[verse] = verseText
|
||||
|
||||
|
@ -132,6 +132,11 @@ class BibleCommon(object):
|
||||
end_tag = text.find(u'</sup>')
|
||||
text = text[:start_tag] + text[end_tag + 6:len(text)]
|
||||
start_tag = text.find(u'<sup>')
|
||||
start_tag = text.find(u'<SUP>')
|
||||
while start_tag > -1:
|
||||
end_tag = text.find(u'</SUP>')
|
||||
text = text[:start_tag] + text[end_tag + 6:len(text)]
|
||||
start_tag = text.find(u'<SUP>')
|
||||
# Static Clean ups
|
||||
text = text.replace(u'\n', u'')
|
||||
text = text.replace(u'\r', u'')
|
||||
|
Loading…
Reference in New Issue
Block a user