Bible reference refactor

2010-07-29 15:36:02 +01:00 · 2010-07-29 15:36:02 +01:00 · fc849398a9
parent 53625d08b6
commit fc849398a9
4 changed files with 164 additions and 261 deletions
--- a/openlp/plugins/bibles/lib/init.py
+++ b/openlp/plugins/bibles/lib/init.py
@ -23,8 +23,166 @@
 # with this program; if not, write to the Free Software Foundation, Inc., 59  #
 # Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
 ###############################################################################
 """
 The :mod:`lib` module contains all the library functionality for the bibles
 plugin.
 """
 import logging
 import re
 log = logging.getLogger(__name__)
 ###############################################################################
 # BIBLE_REFERENCE regular expression produces the following match groups:
 #
 # 0     This is a special group consisting of the whole string that matched.
 # 1     [\w ]+              The book the reference is from.
 # 2     [0-9]+              The first (possibly only) chapter in the reference.
 # 3     None|[0-9]+         None or the only verse or the first verse in a
 #                           verse range or the start verse in a chapter range.
 # 4     None|[0-9]+|end     None or the end verse of the first verse range or
 #                           the end chapter of a chapter range.
 # 5     None|[0-9]+         None or the second chapter in multiple (non-ranged)
 #                           chapters.
 # 6     None|[0-9]+|end     None, the start of the second verse range or the
 #                           end of a chapter range.
 # 7     None|[0-9]+|end     None or the end of the second verse range.
 ###############################################################################
 BIBLE_REFERENCE = re.compile(
    r'^([\w ]+?) *([0-9]+)'          # Initial book and chapter
    r'(?: *[:|v|V] *([0-9]+))?'      # Verse for first chapter
    r'(?: *- *([0-9]+|end$))?'       # Range for verses or chapters
    r'(?:(?:,([0-9]+))?'             # Second chapter
    r' *[,|:|v|V] *([0-9]+|end$)'    # More range for verses or chapters
    r'(?: *- *([0-9]+|end$))?)?$',   # End of second verse range
    re.UNICODE)
 def check_end(match_group):
    """
    Check if a regular expression match group contains the text u'end' or
    should be converted to an int.
    ``match_group``
        The match group to check.
    """
    if match_group == u'end':
        return -1
    else:
        return int(match_group)
 def parse_reference(reference):
    """
    This is the über-awesome function that takes a person's typed in string
    and converts it to a reference list, a list of references to be queried
    from the Bible database files.
    The reference list is a list of tuples, with each tuple structured like
    this::
        (book, chapter, start_verse, end_verse)
    ``reference``
        The bible reference to parse.
    Returns None or a reference list.
    """
    reference = reference.strip()
    log.debug('parse_reference("%s")', reference)
    unified_ref_list = []
    match = BIBLE_REFERENCE.match(reference)
    if match:
        log.debug(u'Matched reference %s' % reference)
        book = match.group(1)
        chapter = int(match.group(2))
        if match.group(7):
            # Two verse ranges
            vr1_start = int(match.group(3))
            vr1_end = int(match.group(4))
            unified_ref_list.append((book, chapter, vr1_start, vr1_end))
            vr2_start = int(match.group(6))
            vr2_end = check_end(match.group(7))
            if match.group(5):
                # One verse range per chapter
                chapter2 = int(match.group(5))
                unified_ref_list.append((book, chapter2, vr2_start, vr2_end))
            else:
                unified_ref_list.append((book, chapter, vr2_start, vr2_end))
        elif match.group(6):
            # Chapter range with verses
            if match.group(3):
                vr1_start = int(match.group(3))
            else:
                vr1_start = 1
            if match.group(2) == match.group(4):
                vr1_end = int(match.group(6))
                unified_ref_list.append((book, chapter, vr1_start, vr1_end))
            else:
                vr1_end = -1
                unified_ref_list.append((book, chapter, vr1_start, vr1_end))
                vr2_end = check_end(match.group(6))
                if int(match.group(4)) > chapter:
                    for x in range(chapter + 1, int(match.group(4)) + 1):
                        if x == int(match.group(4)):
                            unified_ref_list.append((book, x, 1, vr2_end))
                        else:
                            unified_ref_list.append((book, x, 1, -1))
        elif match.group(4):
            # Chapter range or chapter and verse range
            if match.group(3):
                vr1_start = int(match.group(3))
                vr1_end = check_end(match.group(4))
                if vr1_end == -1 or vr1_end > vr1_start:
                    unified_ref_list.append((book, chapter, vr1_start, vr1_end))
                else:
                    log.debug(u'Ambiguous reference: %s' % reference)
                    return None
            elif match.group(4) != u'end':
                for x in range(chapter, int(match.group(4)) + 1):
                    unified_ref_list.append((book, x, 1, -1))
            else:
                log.debug(u'Unsupported reference: %s' % reference)
                return None
        elif match.group(3):
            # Single chapter and verse
            verse = int(match.group(3))
            unified_ref_list.append((book, chapter, verse, verse))
        else:
            # Single chapter
            unified_ref_list.append((book, chapter, -1, -1))
    else:
        log.debug(u'Invalid reference: %s' % reference)
        return None
    return unified_ref_list
 class SearchResults(object):
    """
    Encapsulate a set of search results. This is Bible-type independant.
    """
    def __init__(self, book, chapter, verselist):
        """
        Create the search result object.
        ``book``
            The book of the Bible.
        ``chapter``
            The chapter of the book.
        ``verselist``
            The list of verses for this reading
        """
        self.book = book
        self.chapter = chapter
        self.verselist = verselist
    def has_verselist(self):
        """
        Returns whether or not the verse list contains verses.
        """
        return len(self.verselist) > 0
 from common import BibleCommon
 from manager import BibleManager
 from biblestab import BiblesTab
 from mediaitem import BibleMediaItem
--- a/openlp/plugins/bibles/lib/common.py
+++ b/openlp/plugins/bibles/lib/common.py
@ -1,256 +0,0 @@
 # -*- coding: utf-8 -*-
 # vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
 ###############################################################################
 # OpenLP - Open Source Lyrics Projection                                      #
 # --------------------------------------------------------------------------- #
 # Copyright (c) 2008-2010 Raoul Snyman                                        #
 # Portions copyright (c) 2008-2010 Tim Bentley, Jonathan Corwin, Michael      #
 # Gorven, Scott Guerrieri, Meinert Jordan, Andreas Preikschat, Christian      #
 # Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon Tibble,    #
 # Carsten Tinggaard, Frode Woldsund                                           #
 # --------------------------------------------------------------------------- #
 # This program is free software; you can redistribute it and/or modify it     #
 # under the terms of the GNU General Public License as published by the Free  #
 # Software Foundation; version 2 of the License.                              #
 #                                                                             #
 # This program is distributed in the hope that it will be useful, but WITHOUT #
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
 # more details.                                                               #
 #                                                                             #
 # You should have received a copy of the GNU General Public License along     #
 # with this program; if not, write to the Free Software Foundation, Inc., 59  #
 # Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
 ###############################################################################
 import urllib2
 import logging
 import re
 import chardet
 import htmlentitydefs
 only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
    r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)'
    r'(?:[ ]*-[ ]*([0-9]+|end))?)?',
    re.UNICODE)
 chapter_range = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*'
    r'([0-9]+|end)[ ]*-[ ]*([0-9]+)[ ]*[:|v|V][ ]*([0-9]+|end)',
    re.UNICODE)
 log = logging.getLogger(__name__)
 def parse_reference(reference):
    """
    This is the über-awesome function that takes a person's typed in string
    and converts it to a reference list, a list of references to be queried
    from the Bible database files.
    The reference list is a list of tuples, with each tuple structured like
    this::
        (book, chapter, start_verse, end_verse)
    """
    reference = reference.strip()
    log.debug('parse_reference("%s")', reference)
    reference_list = []
    # We start with the most "complicated" match first, so that they are found
    # first, and we don't have any "false positives".
    match = chapter_range.match(reference)
    if match:
        log.debug('Found a chapter range.')
        book = match.group(1)
        from_verse = match.group(3)
        to_verse = match.group(5)
        if int(match.group(2)) == int(match.group(4)):
            reference_list.append(
                (book, int(match.group(2)), from_verse, to_verse)
            )
        else:
            if int(match.group(2)) > int(match.group(4)):
                from_chapter = int(match.group(4))
                to_chapter = int(match.group(2))
            else:
                from_chapter = int(match.group(2))
                to_chapter = int(match.group(4))
            for chapter in xrange(from_chapter, to_chapter + 1):
                if chapter == from_chapter:
                    reference_list.append((book, chapter, from_verse, -1))
                elif chapter == to_chapter:
                    reference_list.append((book, chapter, 1, to_verse))
                else:
                    reference_list.append((book, chapter, 1, -1))
    else:
        match = only_verses.match(reference)
        if match:
            log.debug('Found a verse range.')
            book = match.group(1)
            chapter = match.group(2)
            verse = match.group(3)
            if match.group(4) is None:
                reference_list.append((book, chapter, verse, verse))
            elif match.group(5) is None:
                end_verse = match.group(4)
                if end_verse == u'end':
                    end_verse = -1
                reference_list.append((book, chapter, verse, end_verse))
            elif match.group(6) is None:
                reference_list.extend([
                    (book, chapter, verse, match.group(4)),
                    (book, chapter, match.group(5), match.group(5))
                ])
            else:
                end_verse = match.group(6)
                if end_verse == u'end':
                    end_verse = -1
                reference_list.extend([
                    (book, chapter, verse, match.group(4)),
                    (book, chapter, match.group(5), end_verse)
                ])
        else:
            log.debug('Didn\'t find anything.')
    log.debug(reference_list)
    return reference_list
 class SearchResults(object):
    """
    Encapsulate a set of search results. This is Bible-type independant.
    """
    def __init__(self, book, chapter, verselist):
        """
        Create the search result object.
        ``book``
            The book of the Bible.
        ``chapter``
            The chapter of the book.
        ``verselist``
            The list of verses for this reading
        """
        self.book = book
        self.chapter = chapter
        self.verselist = verselist
    def has_verselist(self):
        """
        Returns whether or not the verse list contains verses.
        """
        return len(self.verselist) > 0
 class BibleCommon(object):
    """
    A common ancestor for bible download sites.
    """
    log.info(u'BibleCommon')
    def _get_web_text(self, urlstring, proxyurl):
        """
        Get the HTML from the web page.
        ``urlstring``
            The URL of the page to open.
        ``proxyurl``
            The URL of a proxy server used to access the Internet.
        """
        log.debug(u'get_web_text %s %s', proxyurl, urlstring)
        if proxyurl:
            proxy_support = urllib2.ProxyHandler({'http': self.proxyurl})
            http_support = urllib2.HTTPHandler()
            opener = urllib2.build_opener(proxy_support, http_support)
            urllib2.install_opener(opener)
        xml_string = u''
        req = urllib2.Request(urlstring)
        #Make us look like an IE Browser on XP to stop blocking by web site
        req.add_header(u'User-Agent',
            u'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)')
        try:
            handle = urllib2.urlopen(req)
            html = handle.read()
            details = chardet.detect(html)
            xml_string = unicode(html, details[u'encoding'])
        except IOError, e:
            if hasattr(e, u'reason'):
                log.exception(u'Reason for failure: %s', e.reason)
        return xml_string
    def _clean_text(self, text):
        """
        Clean up text and remove extra characters after been downloaded from
        the Internet.
        ``text``
            The text from the web page that needs to be cleaned up.
        """
        #return text.rstrip()
        # Remove Headings from the Text
        start_tag = text.find(u'<h')
        while start_tag > -1:
            end_tag = text.find(u'</h', start_tag)
            text = text[:(start_tag - 1)] + text[(end_tag + 4)]
            start_tag = text.find(u'<h')
        # Remove Support References from the Text
        start_tag = text.find(u'<sup>')
        while start_tag > -1:
            end_tag = text.find(u'</sup>')
            text = text[:start_tag] + text[end_tag + 6:len(text)]
            start_tag = text.find(u'<sup>')
        start_tag = text.find(u'<SUP>')
        while start_tag > -1:
            end_tag = text.find(u'</SUP>')
            text = text[:start_tag] + text[end_tag + 6:len(text)]
            start_tag = text.find(u'<SUP>')
        # Static Clean ups
        text = text.replace(u'\n', u'')
        text = text.replace(u'\r', u'')
        text = text.replace(u'&nbsp;', u'')
        text = text.replace(u'<P>', u'')
        text = text.replace(u'<I>', u'')
        text = text.replace(u'</I>', u'')
        text = text.replace(u'<P />', u'')
        text = text.replace(u'<p />', u'')
        text = text.replace(u'</P>', u'')
        text = text.replace(u'<BR>', u'')
        text = text.replace(u'<BR />', u'')
        text = text.replace(u'&quot;', u'\"')
        text = text.replace(u'&apos;', u'\'')
        # Remove some other tags
        start_tag = text.find(u'<')
        while start_tag > -1:
            end_tag = text.find(u'>', start_tag)
            text = text[:start_tag] + text[end_tag + 1:]
            start_tag = text.find(u'<')
        text = text.replace(u'>', u'')
        return text.rstrip().lstrip()
 def unescape(text):
    """
    Removes HTML or XML character references and entities from a text string.
    Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html
    @param text The HTML (or XML) source text.
    @return The plain text, as a Unicode string, if necessary.
    """
    def fixup(markup):
        text = markup.group(0)
        if text.startswith(u'&#'):
            # character reference
            try:
                if text.startswith(u'&#x'):
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub(u'&#?\w+;', fixup, text)
--- a/openlp/plugins/bibles/lib/http.py
+++ b/openlp/plugins/bibles/lib/http.py
@ -36,7 +36,7 @@ from BeautifulSoup import BeautifulSoup, NavigableString
 from openlp.core.lib import Receiver
 from openlp.core.utils import AppLocation
-from openlp.plugins.bibles.lib.common import BibleCommon, SearchResults    
+from openlp.plugins.bibles.lib import SearchResults    
 from openlp.plugins.bibles.lib.db import BibleDB, Book
 log = logging.getLogger(__name__)
@ -177,7 +177,7 @@ class HTTPBooks(object):
        return 0
-class BGExtract(BibleCommon):
+class BGExtract(object):
    """
    Extract verses from BibleGateway
    """
@ -239,7 +239,8 @@ class BGExtract(BibleCommon):
            found_count += 1
        return SearchResults(bookname, chapter, verse_list)
-class CWExtract(BibleCommon):
+
 class CWExtract(object):
    """
    Extract verses from CrossWalk/BibleStudyTools
    """
--- a/openlp/plugins/bibles/lib/manager.py
+++ b/openlp/plugins/bibles/lib/manager.py
@ -30,9 +30,9 @@ from PyQt4 import QtCore
 from openlp.core.lib import SettingsManager
 from openlp.core.utils import AppLocation
 from openlp.plugins.bibles.lib import parse_reference
 from openlp.plugins.bibles.lib.db import BibleDB, BibleMeta
 from common import parse_reference
 from opensong import OpenSongBible
 from osis import OSISBible
 from csvbible import CSVBible