forked from openlp/openlp
Fix up text formatting / stripping from websites
bzr-revno: 88
This commit is contained in:
parent
d81d037f2b
commit
fa9409f874
@ -34,22 +34,23 @@ class BibleCommon:
|
||||
def _cleanText(self, text):
|
||||
"""
|
||||
Clean up text and remove extra characters
|
||||
after been downloaded from web
|
||||
"""
|
||||
# text = text.replace('\n', '')
|
||||
# text = text.replace('\r', '')
|
||||
# text = text.replace(' ', '')
|
||||
# text = text.replace('<P>', '')
|
||||
# text = text.replace('"', '')
|
||||
# Remove Headings from the Text
|
||||
i = text.find("<h")
|
||||
while i > -1:
|
||||
j=text.find("</h", i)
|
||||
text = text[ : (i - 1)]+text[(j+4)]
|
||||
i = text.find("<h")
|
||||
|
||||
# Remove Support References from the Text
|
||||
x = text.find("<sup>")
|
||||
while x > -1:
|
||||
y = text.find("</sup>")
|
||||
#print x, y
|
||||
#print verseText[:x]
|
||||
#print verseText[y + 6:len(verseText)]
|
||||
text= text[:x] + text[y + 6:len(text)]
|
||||
x = text.find("<sup>")
|
||||
#print "text= " + text
|
||||
|
||||
# Static Clean ups
|
||||
text= text.replace('\n', '')
|
||||
text= text.replace('\r', '')
|
||||
text= text.replace(' ', '')
|
||||
@ -64,14 +65,11 @@ class BibleCommon:
|
||||
text= text.replace(chr(189), '1/2')
|
||||
text= text.replace(""", '"')
|
||||
text= text.replace("'", "'")
|
||||
x = text.find("<")
|
||||
#print verseText
|
||||
# while x > -1:
|
||||
# y = text.find(">")
|
||||
# #print x , y
|
||||
# #print verseText[:x-1]
|
||||
# #print verseText[y : y-1]
|
||||
# text= text[:x] + text[y+1 : len(text)]
|
||||
# x = text.find("<")
|
||||
i = text.find("<")
|
||||
while i > -1 :
|
||||
j = text.find(">", i)
|
||||
text= text[:i] + text[j+1:]
|
||||
i = text.find("<")
|
||||
|
||||
text= text.replace('>', '')
|
||||
return text.rstrip()
|
||||
|
@ -98,7 +98,7 @@ class BibleHTTPImpl(BibleCommon):
|
||||
versePos = xml_string.find(VerseSearch)
|
||||
#print versePos
|
||||
bible = {}
|
||||
while versePos > 0:
|
||||
while versePos > -1:
|
||||
verseText = "" # clear out string
|
||||
versePos = xml_string.find("</span", versePos)
|
||||
i = xml_string.find(VerseSearch, versePos+1)
|
||||
@ -111,13 +111,13 @@ class BibleHTTPImpl(BibleCommon):
|
||||
i = j
|
||||
verseText = xml_string[versePos + 7 : i ]
|
||||
#print xml_string
|
||||
print "VerseText = " + str(verse) +" "+ verseText
|
||||
#print "VerseText = " + str(verse) +" "+ verseText
|
||||
bible[verse] = self._cleanText(verseText) # store the verse
|
||||
versePos = 0
|
||||
versePos = -1
|
||||
else:
|
||||
i = xml_string[:i].rfind("<span")+1
|
||||
verseText = xml_string[versePos + 7 : i ] # Loose the </span>
|
||||
xml_string = xml_string[i:len(xml_string)] # chop off verse 1
|
||||
verseText = xml_string[versePos + 7 : i - 1 ] # Loose the </span>
|
||||
xml_string = xml_string[i - 1 :len(xml_string)] # chop off verse 1
|
||||
versePos = xml_string.find(VerseSearch) #look for the next verse
|
||||
bible[verse] = self._cleanText(verseText) # store the verse
|
||||
verse += 1
|
||||
|
Loading…
Reference in New Issue
Block a user