diff --git a/openlp/plugins/bibles/lib/http.py b/openlp/plugins/bibles/lib/http.py index b844bbe61..e004be9df 100644 --- a/openlp/plugins/bibles/lib/http.py +++ b/openlp/plugins/bibles/lib/http.py @@ -210,7 +210,8 @@ class BGExtract(object): cleaner = [(re.compile(' |
|\'\+\''), lambda match: '')] soup = get_soup_for_bible_ref( u'http://www.biblegateway.com/passage/?%s' % url_params, - cleaner=cleaner) + pre_parse_regex=r'', pre_parse_substitute='', + cleaner=cleaner) if not soup: return None Receiver.send_message(u'openlp_process_events') @@ -499,7 +500,8 @@ class HTTPBible(BibleDB): """ return HTTPBooks.get_verse_count(book, chapter) -def get_soup_for_bible_ref(reference_url, header=None, cleaner=None): +def get_soup_for_bible_ref(reference_url, header=None, pre_parse_regex=None, + pre_parse_substitute=None, cleaner=None): """ Gets a webpage and returns a parsed and optionally cleaned soup or None. @@ -509,6 +511,13 @@ def get_soup_for_bible_ref(reference_url, header=None, cleaner=None): ``header`` An optional HTTP header to pass to the bible web server. + ``pre_parse_regex`` + A regular expression to run on the webpage. Allows manipulation of the + webpage before passing to BeautifulSoup for parsing. + + ``pre_parse_substitute`` + The text to replace any matches to the regular expression with. + ``cleaner`` An optional regex to use during webpage parsing. """ @@ -518,12 +527,15 @@ def get_soup_for_bible_ref(reference_url, header=None, cleaner=None): if not page: send_error_message(u'download') return None + page_source = page.read() + if pre_parse_regex and pre_parse_substitute is not None: + page_source = re.sub(pre_parse_regex, pre_parse_substitute, page_source) soup = None try: if cleaner: - soup = BeautifulSoup(page, markupMassage=cleaner) + soup = BeautifulSoup(page_source, markupMassage=cleaner) else: - soup = BeautifulSoup(page) + soup = BeautifulSoup(page_source) except HTMLParseError: log.exception(u'BeautifulSoup could not parse the bible page.') if not soup: