forked from openlp/openlp
Fix html ripping bug for bibles
This commit is contained in:
parent
f576e9d83d
commit
6c30e67723
@ -116,6 +116,7 @@ class CWExtract(BibleCommon):
|
|||||||
## Strip Verse Data from Page and build an array
|
## Strip Verse Data from Page and build an array
|
||||||
##
|
##
|
||||||
#log.debug(u'bible data %s', xml_string)
|
#log.debug(u'bible data %s', xml_string)
|
||||||
|
#print xml_string
|
||||||
i= xml_string.find(u'NavCurrentChapter')
|
i= xml_string.find(u'NavCurrentChapter')
|
||||||
xml_string = xml_string[i:len(xml_string)]
|
xml_string = xml_string[i:len(xml_string)]
|
||||||
i= xml_string.find(u'<TABLE')
|
i= xml_string.find(u'<TABLE')
|
||||||
@ -136,11 +137,10 @@ class CWExtract(BibleCommon):
|
|||||||
i = xml_string.find(u'</I></B>', versePos)
|
i = xml_string.find(u'</I></B>', versePos)
|
||||||
#log.debug( versePos, i)
|
#log.debug( versePos, i)
|
||||||
verse= xml_string[versePos:i] # Got the Chapter
|
verse= xml_string[versePos:i] # Got the Chapter
|
||||||
#verse = int(temp)
|
|
||||||
#log.debug( 'Chapter = %s', verse)
|
#log.debug( 'Chapter = %s', verse)
|
||||||
# move the starting position to begining of the text
|
# move the starting position to begining of the text
|
||||||
versePos = i + 8
|
versePos = i + 8
|
||||||
# fined the start of the next verse
|
# find the start of the next verse
|
||||||
i = xml_string.find(u'<B><I>', versePos)
|
i = xml_string.find(u'<B><I>', versePos)
|
||||||
if i == -1:
|
if i == -1:
|
||||||
i = xml_string.find(u'</BLOCKQUOTE>',versePos)
|
i = xml_string.find(u'</BLOCKQUOTE>',versePos)
|
||||||
@ -150,6 +150,7 @@ class CWExtract(BibleCommon):
|
|||||||
#log.debug( i, versePos)
|
#log.debug( i, versePos)
|
||||||
verseText = xml_string[versePos: i]
|
verseText = xml_string[versePos: i]
|
||||||
versePos = i
|
versePos = i
|
||||||
|
#print verseText
|
||||||
bible[verse] = self._clean_text(verseText)
|
bible[verse] = self._clean_text(verseText)
|
||||||
#bible[verse] = verseText
|
#bible[verse] = verseText
|
||||||
|
|
||||||
|
@ -132,6 +132,11 @@ class BibleCommon(object):
|
|||||||
end_tag = text.find(u'</sup>')
|
end_tag = text.find(u'</sup>')
|
||||||
text = text[:start_tag] + text[end_tag + 6:len(text)]
|
text = text[:start_tag] + text[end_tag + 6:len(text)]
|
||||||
start_tag = text.find(u'<sup>')
|
start_tag = text.find(u'<sup>')
|
||||||
|
start_tag = text.find(u'<SUP>')
|
||||||
|
while start_tag > -1:
|
||||||
|
end_tag = text.find(u'</SUP>')
|
||||||
|
text = text[:start_tag] + text[end_tag + 6:len(text)]
|
||||||
|
start_tag = text.find(u'<SUP>')
|
||||||
# Static Clean ups
|
# Static Clean ups
|
||||||
text = text.replace(u'\n', u'')
|
text = text.replace(u'\n', u'')
|
||||||
text = text.replace(u'\r', u'')
|
text = text.replace(u'\r', u'')
|
||||||
|
Loading…
Reference in New Issue
Block a user