From 2a903d6a5b1840764bbb5963d0641996f3b5def6 Mon Sep 17 00:00:00 2001 From: Raoul Snyman Date: Fri, 19 Mar 2010 09:00:41 +0200 Subject: [PATCH] Added some documentation and removed the commented code. --- openlp/plugins/bibles/lib/http.py | 50 +++++-------------------------- 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/openlp/plugins/bibles/lib/http.py b/openlp/plugins/bibles/lib/http.py index ca75fb16b..55350c093 100644 --- a/openlp/plugins/bibles/lib/http.py +++ b/openlp/plugins/bibles/lib/http.py @@ -146,14 +146,16 @@ class BGExtract(BibleCommon): urlstring = u'http://www.biblegateway.com/passage/?search=%s+%s' \ u'&version=%s' % (bookname, chapter, version) log.debug(u'BibleGateway url = %s' % urlstring) - + # Let's get the page, and then open it in BeautifulSoup, so as to + # attempt to make "easy" work of bad HTML. page = urllib2.urlopen(urlstring) soup = BeautifulSoup(page) verses = soup.find(u'div', u'result-text-style-normal') - verse_number = 0 verse_list = {0: u''} - + # http://www.codinghorror.com/blog/2009/11/parsing-html-the-cthulhu-way.html + # This is a PERFECT example of opening the Cthulu tag! + # O Bible Gateway, why doth ye such horrific HTML produce? for verse in verses: if isinstance(verse, Tag) and verse.name == u'div' and filter(lambda a: a[0] == u'class', verse.attrs)[0][1] == u'footnotes': break @@ -195,46 +197,10 @@ class BGExtract(BibleCommon): continue if isinstance(verse, NavigableString): verse_list[verse_number] = verse_list[verse_number] + verse.replace(u' ', u' ') - + # Delete the "0" element, since we don't need it, it's just there for + # some stupid initial whitespace, courtesy of Bible Gateway. del verse_list[0] - -# xml_string = self._get_web_text(urlstring, self.proxyurl) -# verseSearch = u' -1: -# # clear out string -# verseText = u'' -# versePos = xml_string.find(u'', versePos) + 6 -# i = xml_string.find(verseSearch, versePos + 1) -# # Not sure if this is needed now -# if i == -1: -# i = xml_string.find(u' 0 and j < i: -# i = j -# verseText = xml_string[versePos + 7 : i ] -# # store the verse -# bible[verse] = self._clean_text(verseText) -# versePos = -1 -# else: -# verseText = xml_string[versePos: i] -# start_tag = verseText.find(verseFootnote) -# while start_tag > -1: -# end_tag = verseText.find(u'') -# verseText = verseText[:start_tag] + verseText[end_tag + 6:len(verseText)] -# start_tag = verseText.find(verseFootnote) -# # Chop off verse and start again -# xml_string = xml_string[i:] -# #look for the next verse -# versePos = xml_string.find(verseSearch) -# # store the verse -# bible[verse] = self._clean_text(verseText) -# verse += 1 + # Finally, return the list of verses in a "SearchResults" object. return SearchResults(bookname, chapter, verse_list) class CWExtract(BibleCommon):