forked from openlp/openlp
Fix up text formatting / stripping from websites
bzr-revno: 88
This commit is contained in:
parent
d81d037f2b
commit
fa9409f874
@ -34,22 +34,23 @@ class BibleCommon:
|
|||||||
def _cleanText(self, text):
|
def _cleanText(self, text):
|
||||||
"""
|
"""
|
||||||
Clean up text and remove extra characters
|
Clean up text and remove extra characters
|
||||||
|
after been downloaded from web
|
||||||
"""
|
"""
|
||||||
# text = text.replace('\n', '')
|
# Remove Headings from the Text
|
||||||
# text = text.replace('\r', '')
|
i = text.find("<h")
|
||||||
# text = text.replace(' ', '')
|
while i > -1:
|
||||||
# text = text.replace('<P>', '')
|
j=text.find("</h", i)
|
||||||
# text = text.replace('"', '')
|
text = text[ : (i - 1)]+text[(j+4)]
|
||||||
|
i = text.find("<h")
|
||||||
|
|
||||||
|
# Remove Support References from the Text
|
||||||
x = text.find("<sup>")
|
x = text.find("<sup>")
|
||||||
while x > -1:
|
while x > -1:
|
||||||
y = text.find("</sup>")
|
y = text.find("</sup>")
|
||||||
#print x, y
|
|
||||||
#print verseText[:x]
|
|
||||||
#print verseText[y + 6:len(verseText)]
|
|
||||||
text= text[:x] + text[y + 6:len(text)]
|
text= text[:x] + text[y + 6:len(text)]
|
||||||
x = text.find("<sup>")
|
x = text.find("<sup>")
|
||||||
#print "text= " + text
|
|
||||||
|
|
||||||
|
# Static Clean ups
|
||||||
text= text.replace('\n', '')
|
text= text.replace('\n', '')
|
||||||
text= text.replace('\r', '')
|
text= text.replace('\r', '')
|
||||||
text= text.replace(' ', '')
|
text= text.replace(' ', '')
|
||||||
@ -64,14 +65,11 @@ class BibleCommon:
|
|||||||
text= text.replace(chr(189), '1/2')
|
text= text.replace(chr(189), '1/2')
|
||||||
text= text.replace(""", '"')
|
text= text.replace(""", '"')
|
||||||
text= text.replace("'", "'")
|
text= text.replace("'", "'")
|
||||||
x = text.find("<")
|
i = text.find("<")
|
||||||
#print verseText
|
while i > -1 :
|
||||||
# while x > -1:
|
j = text.find(">", i)
|
||||||
# y = text.find(">")
|
text= text[:i] + text[j+1:]
|
||||||
# #print x , y
|
i = text.find("<")
|
||||||
# #print verseText[:x-1]
|
|
||||||
# #print verseText[y : y-1]
|
|
||||||
# text= text[:x] + text[y+1 : len(text)]
|
|
||||||
# x = text.find("<")
|
|
||||||
text= text.replace('>', '')
|
text= text.replace('>', '')
|
||||||
return text.rstrip()
|
return text.rstrip()
|
||||||
|
@ -98,7 +98,7 @@ class BibleHTTPImpl(BibleCommon):
|
|||||||
versePos = xml_string.find(VerseSearch)
|
versePos = xml_string.find(VerseSearch)
|
||||||
#print versePos
|
#print versePos
|
||||||
bible = {}
|
bible = {}
|
||||||
while versePos > 0:
|
while versePos > -1:
|
||||||
verseText = "" # clear out string
|
verseText = "" # clear out string
|
||||||
versePos = xml_string.find("</span", versePos)
|
versePos = xml_string.find("</span", versePos)
|
||||||
i = xml_string.find(VerseSearch, versePos+1)
|
i = xml_string.find(VerseSearch, versePos+1)
|
||||||
@ -111,13 +111,13 @@ class BibleHTTPImpl(BibleCommon):
|
|||||||
i = j
|
i = j
|
||||||
verseText = xml_string[versePos + 7 : i ]
|
verseText = xml_string[versePos + 7 : i ]
|
||||||
#print xml_string
|
#print xml_string
|
||||||
print "VerseText = " + str(verse) +" "+ verseText
|
#print "VerseText = " + str(verse) +" "+ verseText
|
||||||
bible[verse] = self._cleanText(verseText) # store the verse
|
bible[verse] = self._cleanText(verseText) # store the verse
|
||||||
versePos = 0
|
versePos = -1
|
||||||
else:
|
else:
|
||||||
i = xml_string[:i].rfind("<span")+1
|
i = xml_string[:i].rfind("<span")+1
|
||||||
verseText = xml_string[versePos + 7 : i ] # Loose the </span>
|
verseText = xml_string[versePos + 7 : i - 1 ] # Loose the </span>
|
||||||
xml_string = xml_string[i:len(xml_string)] # chop off verse 1
|
xml_string = xml_string[i - 1 :len(xml_string)] # chop off verse 1
|
||||||
versePos = xml_string.find(VerseSearch) #look for the next verse
|
versePos = xml_string.find(VerseSearch) #look for the next verse
|
||||||
bible[verse] = self._cleanText(verseText) # store the verse
|
bible[verse] = self._cleanText(verseText) # store the verse
|
||||||
verse += 1
|
verse += 1
|
||||||
|
Loading…
Reference in New Issue
Block a user