diff --git a/openlp/plugins/bibles/lib/__init__.py b/openlp/plugins/bibles/lib/__init__.py index 3ad4fe39b..fbb292db5 100644 --- a/openlp/plugins/bibles/lib/__init__.py +++ b/openlp/plugins/bibles/lib/__init__.py @@ -23,8 +23,166 @@ # with this program; if not, write to the Free Software Foundation, Inc., 59 # # Temple Place, Suite 330, Boston, MA 02111-1307 USA # ############################################################################### +""" +The :mod:`lib` module contains all the library functionality for the bibles +plugin. +""" +import logging +import re + +log = logging.getLogger(__name__) + +############################################################################### +# BIBLE_REFERENCE regular expression produces the following match groups: +# +# 0 This is a special group consisting of the whole string that matched. +# 1 [\w ]+ The book the reference is from. +# 2 [0-9]+ The first (possibly only) chapter in the reference. +# 3 None|[0-9]+ None or the only verse or the first verse in a +# verse range or the start verse in a chapter range. +# 4 None|[0-9]+|end None or the end verse of the first verse range or +# the end chapter of a chapter range. +# 5 None|[0-9]+ None or the second chapter in multiple (non-ranged) +# chapters. +# 6 None|[0-9]+|end None, the start of the second verse range or the +# end of a chapter range. +# 7 None|[0-9]+|end None or the end of the second verse range. +############################################################################### + +BIBLE_REFERENCE = re.compile( + r'^([\w ]+?) *([0-9]+)' # Initial book and chapter + r'(?: *[:|v|V] *([0-9]+))?' # Verse for first chapter + r'(?: *- *([0-9]+|end$))?' # Range for verses or chapters + r'(?:(?:,([0-9]+))?' # Second chapter + r' *[,|:|v|V] *([0-9]+|end$)' # More range for verses or chapters + r'(?: *- *([0-9]+|end$))?)?$', # End of second verse range + re.UNICODE) + +def check_end(match_group): + """ + Check if a regular expression match group contains the text u'end' or + should be converted to an int. + + ``match_group`` + The match group to check. + """ + if match_group == u'end': + return -1 + else: + return int(match_group) + +def parse_reference(reference): + """ + This is the über-awesome function that takes a person's typed in string + and converts it to a reference list, a list of references to be queried + from the Bible database files. + + The reference list is a list of tuples, with each tuple structured like + this:: + (book, chapter, start_verse, end_verse) + + ``reference`` + The bible reference to parse. + + Returns None or a reference list. + """ + reference = reference.strip() + log.debug('parse_reference("%s")', reference) + unified_ref_list = [] + match = BIBLE_REFERENCE.match(reference) + if match: + log.debug(u'Matched reference %s' % reference) + book = match.group(1) + chapter = int(match.group(2)) + if match.group(7): + # Two verse ranges + vr1_start = int(match.group(3)) + vr1_end = int(match.group(4)) + unified_ref_list.append((book, chapter, vr1_start, vr1_end)) + vr2_start = int(match.group(6)) + vr2_end = check_end(match.group(7)) + if match.group(5): + # One verse range per chapter + chapter2 = int(match.group(5)) + unified_ref_list.append((book, chapter2, vr2_start, vr2_end)) + else: + unified_ref_list.append((book, chapter, vr2_start, vr2_end)) + elif match.group(6): + # Chapter range with verses + if match.group(3): + vr1_start = int(match.group(3)) + else: + vr1_start = 1 + if match.group(2) == match.group(4): + vr1_end = int(match.group(6)) + unified_ref_list.append((book, chapter, vr1_start, vr1_end)) + else: + vr1_end = -1 + unified_ref_list.append((book, chapter, vr1_start, vr1_end)) + vr2_end = check_end(match.group(6)) + if int(match.group(4)) > chapter: + for x in range(chapter + 1, int(match.group(4)) + 1): + if x == int(match.group(4)): + unified_ref_list.append((book, x, 1, vr2_end)) + else: + unified_ref_list.append((book, x, 1, -1)) + elif match.group(4): + # Chapter range or chapter and verse range + if match.group(3): + vr1_start = int(match.group(3)) + vr1_end = check_end(match.group(4)) + if vr1_end == -1 or vr1_end > vr1_start: + unified_ref_list.append((book, chapter, vr1_start, vr1_end)) + else: + log.debug(u'Ambiguous reference: %s' % reference) + return None + elif match.group(4) != u'end': + for x in range(chapter, int(match.group(4)) + 1): + unified_ref_list.append((book, x, 1, -1)) + else: + log.debug(u'Unsupported reference: %s' % reference) + return None + elif match.group(3): + # Single chapter and verse + verse = int(match.group(3)) + unified_ref_list.append((book, chapter, verse, verse)) + else: + # Single chapter + unified_ref_list.append((book, chapter, -1, -1)) + else: + log.debug(u'Invalid reference: %s' % reference) + return None + return unified_ref_list + + +class SearchResults(object): + """ + Encapsulate a set of search results. This is Bible-type independant. + """ + def __init__(self, book, chapter, verselist): + """ + Create the search result object. + + ``book`` + The book of the Bible. + + ``chapter`` + The chapter of the book. + + ``verselist`` + The list of verses for this reading + """ + self.book = book + self.chapter = chapter + self.verselist = verselist + + def has_verselist(self): + """ + Returns whether or not the verse list contains verses. + """ + return len(self.verselist) > 0 + -from common import BibleCommon from manager import BibleManager from biblestab import BiblesTab from mediaitem import BibleMediaItem diff --git a/openlp/plugins/bibles/lib/common.py b/openlp/plugins/bibles/lib/common.py deleted file mode 100644 index 5308495a3..000000000 --- a/openlp/plugins/bibles/lib/common.py +++ /dev/null @@ -1,256 +0,0 @@ -# -*- coding: utf-8 -*- -# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4 - -############################################################################### -# OpenLP - Open Source Lyrics Projection # -# --------------------------------------------------------------------------- # -# Copyright (c) 2008-2010 Raoul Snyman # -# Portions copyright (c) 2008-2010 Tim Bentley, Jonathan Corwin, Michael # -# Gorven, Scott Guerrieri, Meinert Jordan, Andreas Preikschat, Christian # -# Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon Tibble, # -# Carsten Tinggaard, Frode Woldsund # -# --------------------------------------------------------------------------- # -# This program is free software; you can redistribute it and/or modify it # -# under the terms of the GNU General Public License as published by the Free # -# Software Foundation; version 2 of the License. # -# # -# This program is distributed in the hope that it will be useful, but WITHOUT # -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # -# more details. # -# # -# You should have received a copy of the GNU General Public License along # -# with this program; if not, write to the Free Software Foundation, Inc., 59 # -# Temple Place, Suite 330, Boston, MA 02111-1307 USA # -############################################################################### - -import urllib2 -import logging -import re -import chardet -import htmlentitydefs - -only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)' - r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)' - r'(?:[ ]*-[ ]*([0-9]+|end))?)?', - re.UNICODE) -chapter_range = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*' - r'([0-9]+|end)[ ]*-[ ]*([0-9]+)[ ]*[:|v|V][ ]*([0-9]+|end)', - re.UNICODE) - -log = logging.getLogger(__name__) - -def parse_reference(reference): - """ - This is the über-awesome function that takes a person's typed in string - and converts it to a reference list, a list of references to be queried - from the Bible database files. - - The reference list is a list of tuples, with each tuple structured like - this:: - - (book, chapter, start_verse, end_verse) - """ - reference = reference.strip() - log.debug('parse_reference("%s")', reference) - reference_list = [] - # We start with the most "complicated" match first, so that they are found - # first, and we don't have any "false positives". - match = chapter_range.match(reference) - if match: - log.debug('Found a chapter range.') - book = match.group(1) - from_verse = match.group(3) - to_verse = match.group(5) - if int(match.group(2)) == int(match.group(4)): - reference_list.append( - (book, int(match.group(2)), from_verse, to_verse) - ) - else: - if int(match.group(2)) > int(match.group(4)): - from_chapter = int(match.group(4)) - to_chapter = int(match.group(2)) - else: - from_chapter = int(match.group(2)) - to_chapter = int(match.group(4)) - for chapter in xrange(from_chapter, to_chapter + 1): - if chapter == from_chapter: - reference_list.append((book, chapter, from_verse, -1)) - elif chapter == to_chapter: - reference_list.append((book, chapter, 1, to_verse)) - else: - reference_list.append((book, chapter, 1, -1)) - else: - match = only_verses.match(reference) - if match: - log.debug('Found a verse range.') - book = match.group(1) - chapter = match.group(2) - verse = match.group(3) - if match.group(4) is None: - reference_list.append((book, chapter, verse, verse)) - elif match.group(5) is None: - end_verse = match.group(4) - if end_verse == u'end': - end_verse = -1 - reference_list.append((book, chapter, verse, end_verse)) - elif match.group(6) is None: - reference_list.extend([ - (book, chapter, verse, match.group(4)), - (book, chapter, match.group(5), match.group(5)) - ]) - else: - end_verse = match.group(6) - if end_verse == u'end': - end_verse = -1 - reference_list.extend([ - (book, chapter, verse, match.group(4)), - (book, chapter, match.group(5), end_verse) - ]) - else: - log.debug('Didn\'t find anything.') - log.debug(reference_list) - return reference_list - -class SearchResults(object): - """ - Encapsulate a set of search results. This is Bible-type independant. - """ - def __init__(self, book, chapter, verselist): - """ - Create the search result object. - - ``book`` - The book of the Bible. - - ``chapter`` - The chapter of the book. - - ``verselist`` - The list of verses for this reading - """ - self.book = book - self.chapter = chapter - self.verselist = verselist - - def has_verselist(self): - """ - Returns whether or not the verse list contains verses. - """ - return len(self.verselist) > 0 - - -class BibleCommon(object): - """ - A common ancestor for bible download sites. - """ - log.info(u'BibleCommon') - - def _get_web_text(self, urlstring, proxyurl): - """ - Get the HTML from the web page. - - ``urlstring`` - The URL of the page to open. - - ``proxyurl`` - The URL of a proxy server used to access the Internet. - """ - log.debug(u'get_web_text %s %s', proxyurl, urlstring) - if proxyurl: - proxy_support = urllib2.ProxyHandler({'http': self.proxyurl}) - http_support = urllib2.HTTPHandler() - opener = urllib2.build_opener(proxy_support, http_support) - urllib2.install_opener(opener) - xml_string = u'' - req = urllib2.Request(urlstring) - #Make us look like an IE Browser on XP to stop blocking by web site - req.add_header(u'User-Agent', - u'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)') - try: - handle = urllib2.urlopen(req) - html = handle.read() - details = chardet.detect(html) - xml_string = unicode(html, details[u'encoding']) - except IOError, e: - if hasattr(e, u'reason'): - log.exception(u'Reason for failure: %s', e.reason) - return xml_string - - def _clean_text(self, text): - """ - Clean up text and remove extra characters after been downloaded from - the Internet. - - ``text`` - The text from the web page that needs to be cleaned up. - """ - #return text.rstrip() - # Remove Headings from the Text - start_tag = text.find(u' -1: - end_tag = text.find(u'') - while start_tag > -1: - end_tag = text.find(u'') - text = text[:start_tag] + text[end_tag + 6:len(text)] - start_tag = text.find(u'') - start_tag = text.find(u'') - while start_tag > -1: - end_tag = text.find(u'') - text = text[:start_tag] + text[end_tag + 6:len(text)] - start_tag = text.find(u'') - # Static Clean ups - text = text.replace(u'\n', u'') - text = text.replace(u'\r', u'') - text = text.replace(u' ', u'') - text = text.replace(u'

', u'') - text = text.replace(u'', u'') - text = text.replace(u'', u'') - text = text.replace(u'

', u'') - text = text.replace(u'

', u'') - text = text.replace(u'

', u'') - text = text.replace(u'
', u'') - text = text.replace(u'
', u'') - text = text.replace(u'"', u'\"') - text = text.replace(u''', u'\'') - # Remove some other tags - start_tag = text.find(u'<') - while start_tag > -1: - end_tag = text.find(u'>', start_tag) - text = text[:start_tag] + text[end_tag + 1:] - start_tag = text.find(u'<') - text = text.replace(u'>', u'') - return text.rstrip().lstrip() - - -def unescape(text): - """ - Removes HTML or XML character references and entities from a text string. - Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html - - @param text The HTML (or XML) source text. - @return The plain text, as a Unicode string, if necessary. - """ - def fixup(markup): - text = markup.group(0) - if text.startswith(u'&#'): - # character reference - try: - if text.startswith(u'&#x'): - return unichr(int(text[3:-1], 16)) - else: - return unichr(int(text[2:-1])) - except ValueError: - pass - else: - # named entity - try: - text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) - except KeyError: - pass - return text # leave as is - return re.sub(u'&#?\w+;', fixup, text) diff --git a/openlp/plugins/bibles/lib/http.py b/openlp/plugins/bibles/lib/http.py index 971677fde..415a0cde5 100644 --- a/openlp/plugins/bibles/lib/http.py +++ b/openlp/plugins/bibles/lib/http.py @@ -36,7 +36,7 @@ from BeautifulSoup import BeautifulSoup, NavigableString from openlp.core.lib import Receiver from openlp.core.utils import AppLocation -from openlp.plugins.bibles.lib.common import BibleCommon, SearchResults +from openlp.plugins.bibles.lib import SearchResults from openlp.plugins.bibles.lib.db import BibleDB, Book log = logging.getLogger(__name__) @@ -177,7 +177,7 @@ class HTTPBooks(object): return 0 -class BGExtract(BibleCommon): +class BGExtract(object): """ Extract verses from BibleGateway """ @@ -239,7 +239,8 @@ class BGExtract(BibleCommon): found_count += 1 return SearchResults(bookname, chapter, verse_list) -class CWExtract(BibleCommon): + +class CWExtract(object): """ Extract verses from CrossWalk/BibleStudyTools """ diff --git a/openlp/plugins/bibles/lib/manager.py b/openlp/plugins/bibles/lib/manager.py index 5c2767d0e..ebca8ca97 100644 --- a/openlp/plugins/bibles/lib/manager.py +++ b/openlp/plugins/bibles/lib/manager.py @@ -30,9 +30,9 @@ from PyQt4 import QtCore from openlp.core.lib import SettingsManager from openlp.core.utils import AppLocation +from openlp.plugins.bibles.lib import parse_reference from openlp.plugins.bibles.lib.db import BibleDB, BibleMeta -from common import parse_reference from opensong import OpenSongBible from osis import OSISBible from csvbible import CSVBible