Bible reference refactor

2010-07-29 15:36:02 +01:00 · 2010-07-29 15:36:02 +01:00 · fc849398a9
commit fc849398a9
parent 53625d08b6
4 changed files with 164 additions and 261 deletions
--- a/openlp/plugins/bibles/lib/init.py
+++ b/openlp/plugins/bibles/lib/init.py
@ -23,8 +23,166 @@
 # with this program; if not, write to the Free Software Foundation, Inc., 59  #
 # Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
 ###############################################################################
+"""
+The :mod:`lib` module contains all the library functionality for the bibles
+plugin.
+"""
+import logging
+import re
+
+log = logging.getLogger(__name__)
+
+###############################################################################
+# BIBLE_REFERENCE regular expression produces the following match groups:
+#
+# 0     This is a special group consisting of the whole string that matched.
+# 1     [\w ]+              The book the reference is from.
+# 2     [0-9]+              The first (possibly only) chapter in the reference.
+# 3     None|[0-9]+         None or the only verse or the first verse in a
+#                           verse range or the start verse in a chapter range.
+# 4     None|[0-9]+|end     None or the end verse of the first verse range or
+#                           the end chapter of a chapter range.
+# 5     None|[0-9]+         None or the second chapter in multiple (non-ranged)
+#                           chapters.
+# 6     None|[0-9]+|end     None, the start of the second verse range or the
+#                           end of a chapter range.
+# 7     None|[0-9]+|end     None or the end of the second verse range.
+###############################################################################
+
+BIBLE_REFERENCE = re.compile(
+    r'^([\w ]+?) *([0-9]+)'          # Initial book and chapter
+    r'(?: *[:|v|V] *([0-9]+))?'      # Verse for first chapter
+    r'(?: *- *([0-9]+|end$))?'       # Range for verses or chapters
+    r'(?:(?:,([0-9]+))?'             # Second chapter
+    r' *[,|:|v|V] *([0-9]+|end$)'    # More range for verses or chapters
+    r'(?: *- *([0-9]+|end$))?)?$',   # End of second verse range
+    re.UNICODE)
+
+def check_end(match_group):
+    """
+    Check if a regular expression match group contains the text u'end' or
+    should be converted to an int.
+
+    ``match_group``
+        The match group to check.
+    """
+    if match_group == u'end':
+        return -1
+    else:
+        return int(match_group)
+
+def parse_reference(reference):
+    """
+    This is the über-awesome function that takes a person's typed in string
+    and converts it to a reference list, a list of references to be queried
+    from the Bible database files.
+
+    The reference list is a list of tuples, with each tuple structured like
+    this::
+        (book, chapter, start_verse, end_verse)
+
+    ``reference``
+        The bible reference to parse.
+
+    Returns None or a reference list.
+    """
+    reference = reference.strip()
+    log.debug('parse_reference("%s")', reference)
+    unified_ref_list = []
+    match = BIBLE_REFERENCE.match(reference)
+    if match:
+        log.debug(u'Matched reference %s' % reference)
+        book = match.group(1)
+        chapter = int(match.group(2))
+        if match.group(7):
+            # Two verse ranges
+            vr1_start = int(match.group(3))
+            vr1_end = int(match.group(4))
+            unified_ref_list.append((book, chapter, vr1_start, vr1_end))
+            vr2_start = int(match.group(6))
+            vr2_end = check_end(match.group(7))
+            if match.group(5):
+                # One verse range per chapter
+                chapter2 = int(match.group(5))
+                unified_ref_list.append((book, chapter2, vr2_start, vr2_end))
+            else:
+                unified_ref_list.append((book, chapter, vr2_start, vr2_end))
+        elif match.group(6):
+            # Chapter range with verses
+            if match.group(3):
+                vr1_start = int(match.group(3))
+            else:
+                vr1_start = 1
+            if match.group(2) == match.group(4):
+                vr1_end = int(match.group(6))
+                unified_ref_list.append((book, chapter, vr1_start, vr1_end))
+            else:
+                vr1_end = -1
+                unified_ref_list.append((book, chapter, vr1_start, vr1_end))
+                vr2_end = check_end(match.group(6))
+                if int(match.group(4)) > chapter:
+                    for x in range(chapter + 1, int(match.group(4)) + 1):
+                        if x == int(match.group(4)):
+                            unified_ref_list.append((book, x, 1, vr2_end))
+                        else:
+                            unified_ref_list.append((book, x, 1, -1))
+        elif match.group(4):
+            # Chapter range or chapter and verse range
+            if match.group(3):
+                vr1_start = int(match.group(3))
+                vr1_end = check_end(match.group(4))
+                if vr1_end == -1 or vr1_end > vr1_start:
+                    unified_ref_list.append((book, chapter, vr1_start, vr1_end))
+                else:
+                    log.debug(u'Ambiguous reference: %s' % reference)
+                    return None
+            elif match.group(4) != u'end':
+                for x in range(chapter, int(match.group(4)) + 1):
+                    unified_ref_list.append((book, x, 1, -1))
+            else:
+                log.debug(u'Unsupported reference: %s' % reference)
+                return None
+        elif match.group(3):
+            # Single chapter and verse
+            verse = int(match.group(3))
+            unified_ref_list.append((book, chapter, verse, verse))
+        else:
+            # Single chapter
+            unified_ref_list.append((book, chapter, -1, -1))
+    else:
+        log.debug(u'Invalid reference: %s' % reference)
+        return None
+    return unified_ref_list
+
+
+class SearchResults(object):
+    """
+    Encapsulate a set of search results. This is Bible-type independant.
+    """
+    def __init__(self, book, chapter, verselist):
+        """
+        Create the search result object.
+
+        ``book``
+            The book of the Bible.
+
+        ``chapter``
+            The chapter of the book.
+
+        ``verselist``
+            The list of verses for this reading
+        """
+        self.book = book
+        self.chapter = chapter
+        self.verselist = verselist
+
+    def has_verselist(self):
+        """
+        Returns whether or not the verse list contains verses.
+        """
+        return len(self.verselist) > 0
+

-from common import BibleCommon
 from manager import BibleManager
 from biblestab import BiblesTab
 from mediaitem import BibleMediaItem
--- a/openlp/plugins/bibles/lib/common.py
+++ b/openlp/plugins/bibles/lib/common.py
@ -1,256 +0,0 @@
-# -*- coding: utf-8 -*-
-# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
-
-###############################################################################
-# OpenLP - Open Source Lyrics Projection                                      #
-# --------------------------------------------------------------------------- #
-# Copyright (c) 2008-2010 Raoul Snyman                                        #
-# Portions copyright (c) 2008-2010 Tim Bentley, Jonathan Corwin, Michael      #
-# Gorven, Scott Guerrieri, Meinert Jordan, Andreas Preikschat, Christian      #
-# Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon Tibble,    #
-# Carsten Tinggaard, Frode Woldsund                                           #
-# --------------------------------------------------------------------------- #
-# This program is free software; you can redistribute it and/or modify it     #
-# under the terms of the GNU General Public License as published by the Free  #
-# Software Foundation; version 2 of the License.                              #
-#                                                                             #
-# This program is distributed in the hope that it will be useful, but WITHOUT #
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
-# more details.                                                               #
-#                                                                             #
-# You should have received a copy of the GNU General Public License along     #
-# with this program; if not, write to the Free Software Foundation, Inc., 59  #
-# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
-###############################################################################
-
-import urllib2
-import logging
-import re
-import chardet
-import htmlentitydefs
-
-only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
-    r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)'
-    r'(?:[ ]*-[ ]*([0-9]+|end))?)?',
-    re.UNICODE)
-chapter_range = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*'
-    r'([0-9]+|end)[ ]*-[ ]*([0-9]+)[ ]*[:|v|V][ ]*([0-9]+|end)',
-    re.UNICODE)
-
-log = logging.getLogger(__name__)
-
-def parse_reference(reference):
-    """
-    This is the über-awesome function that takes a person's typed in string
-    and converts it to a reference list, a list of references to be queried
-    from the Bible database files.
-
-    The reference list is a list of tuples, with each tuple structured like
-    this::
-
-        (book, chapter, start_verse, end_verse)
-    """
-    reference = reference.strip()
-    log.debug('parse_reference("%s")', reference)
-    reference_list = []
-    # We start with the most "complicated" match first, so that they are found
-    # first, and we don't have any "false positives".
-    match = chapter_range.match(reference)
-    if match:
-        log.debug('Found a chapter range.')
-        book = match.group(1)
-        from_verse = match.group(3)
-        to_verse = match.group(5)
-        if int(match.group(2)) == int(match.group(4)):
-            reference_list.append(
-                (book, int(match.group(2)), from_verse, to_verse)
-            )
-        else:
-            if int(match.group(2)) > int(match.group(4)):
-                from_chapter = int(match.group(4))
-                to_chapter = int(match.group(2))
-            else:
-                from_chapter = int(match.group(2))
-                to_chapter = int(match.group(4))
-            for chapter in xrange(from_chapter, to_chapter + 1):
-                if chapter == from_chapter:
-                    reference_list.append((book, chapter, from_verse, -1))
-                elif chapter == to_chapter:
-                    reference_list.append((book, chapter, 1, to_verse))
-                else:
-                    reference_list.append((book, chapter, 1, -1))
-    else:
-        match = only_verses.match(reference)
-        if match:
-            log.debug('Found a verse range.')
-            book = match.group(1)
-            chapter = match.group(2)
-            verse = match.group(3)
-            if match.group(4) is None:
-                reference_list.append((book, chapter, verse, verse))
-            elif match.group(5) is None:
-                end_verse = match.group(4)
-                if end_verse == u'end':
-                    end_verse = -1
-                reference_list.append((book, chapter, verse, end_verse))
-            elif match.group(6) is None:
-                reference_list.extend([
-                    (book, chapter, verse, match.group(4)),
-                    (book, chapter, match.group(5), match.group(5))
-                ])
-            else:
-                end_verse = match.group(6)
-                if end_verse == u'end':
-                    end_verse = -1
-                reference_list.extend([
-                    (book, chapter, verse, match.group(4)),
-                    (book, chapter, match.group(5), end_verse)
-                ])
-        else:
-            log.debug('Didn\'t find anything.')
-    log.debug(reference_list)
-    return reference_list
-
-class SearchResults(object):
-    """
-    Encapsulate a set of search results. This is Bible-type independant.
-    """
-    def __init__(self, book, chapter, verselist):
-        """
-        Create the search result object.
-
-        ``book``
-            The book of the Bible.
-
-        ``chapter``
-            The chapter of the book.
-
-        ``verselist``
-            The list of verses for this reading
-        """
-        self.book = book
-        self.chapter = chapter
-        self.verselist = verselist
-
-    def has_verselist(self):
-        """
-        Returns whether or not the verse list contains verses.
-        """
-        return len(self.verselist) > 0
-
-
-class BibleCommon(object):
-    """
-    A common ancestor for bible download sites.
-    """
-    log.info(u'BibleCommon')
-
-    def _get_web_text(self, urlstring, proxyurl):
-        """
-        Get the HTML from the web page.
-
-        ``urlstring``
-            The URL of the page to open.
-
-        ``proxyurl``
-            The URL of a proxy server used to access the Internet.
-        """
-        log.debug(u'get_web_text %s %s', proxyurl, urlstring)
-        if proxyurl:
-            proxy_support = urllib2.ProxyHandler({'http': self.proxyurl})
-            http_support = urllib2.HTTPHandler()
-            opener = urllib2.build_opener(proxy_support, http_support)
-            urllib2.install_opener(opener)
-        xml_string = u''
-        req = urllib2.Request(urlstring)
-        #Make us look like an IE Browser on XP to stop blocking by web site
-        req.add_header(u'User-Agent',
-            u'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)')
-        try:
-            handle = urllib2.urlopen(req)
-            html = handle.read()
-            details = chardet.detect(html)
-            xml_string = unicode(html, details[u'encoding'])
-        except IOError, e:
-            if hasattr(e, u'reason'):
-                log.exception(u'Reason for failure: %s', e.reason)
-        return xml_string
-
-    def _clean_text(self, text):
-        """
-        Clean up text and remove extra characters after been downloaded from
-        the Internet.
-
-        ``text``
-            The text from the web page that needs to be cleaned up.
-        """
-        #return text.rstrip()
-        # Remove Headings from the Text
-        start_tag = text.find(u'<h')
-        while start_tag > -1:
-            end_tag = text.find(u'</h', start_tag)
-            text = text[:(start_tag - 1)] + text[(end_tag + 4)]
-            start_tag = text.find(u'<h')
-        # Remove Support References from the Text
-        start_tag = text.find(u'<sup>')
-        while start_tag > -1:
-            end_tag = text.find(u'</sup>')
-            text = text[:start_tag] + text[end_tag + 6:len(text)]
-            start_tag = text.find(u'<sup>')
-        start_tag = text.find(u'<SUP>')
-        while start_tag > -1:
-            end_tag = text.find(u'</SUP>')
-            text = text[:start_tag] + text[end_tag + 6:len(text)]
-            start_tag = text.find(u'<SUP>')
-        # Static Clean ups
-        text = text.replace(u'\n', u'')
-        text = text.replace(u'\r', u'')
-        text = text.replace(u'&nbsp;', u'')
-        text = text.replace(u'<P>', u'')
-        text = text.replace(u'<I>', u'')
-        text = text.replace(u'</I>', u'')
-        text = text.replace(u'<P />', u'')
-        text = text.replace(u'<p />', u'')
-        text = text.replace(u'</P>', u'')
-        text = text.replace(u'<BR>', u'')
-        text = text.replace(u'<BR />', u'')
-        text = text.replace(u'&quot;', u'\"')
-        text = text.replace(u'&apos;', u'\'')
-        # Remove some other tags
-        start_tag = text.find(u'<')
-        while start_tag > -1:
-            end_tag = text.find(u'>', start_tag)
-            text = text[:start_tag] + text[end_tag + 1:]
-            start_tag = text.find(u'<')
-        text = text.replace(u'>', u'')
-        return text.rstrip().lstrip()
-
-
-def unescape(text):
-    """
-    Removes HTML or XML character references and entities from a text string.
-    Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html
-
-    @param text The HTML (or XML) source text.
-    @return The plain text, as a Unicode string, if necessary.
-    """
-    def fixup(markup):
-        text = markup.group(0)
-        if text.startswith(u'&#'):
-            # character reference
-            try:
-                if text.startswith(u'&#x'):
-                    return unichr(int(text[3:-1], 16))
-                else:
-                    return unichr(int(text[2:-1]))
-            except ValueError:
-                pass
-        else:
-            # named entity
-            try:
-                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
-            except KeyError:
-                pass
-        return text # leave as is
-    return re.sub(u'&#?\w+;', fixup, text)
--- a/openlp/plugins/bibles/lib/http.py
+++ b/openlp/plugins/bibles/lib/http.py
@ -36,7 +36,7 @@ from BeautifulSoup import BeautifulSoup, NavigableString

 from openlp.core.lib import Receiver
 from openlp.core.utils import AppLocation
-from openlp.plugins.bibles.lib.common import BibleCommon, SearchResults    
+from openlp.plugins.bibles.lib import SearchResults    
 from openlp.plugins.bibles.lib.db import BibleDB, Book

 log = logging.getLogger(__name__)
@ -177,7 +177,7 @@ class HTTPBooks(object):
        return 0


-class BGExtract(BibleCommon):
+class BGExtract(object):
    """
    Extract verses from BibleGateway
    """
@ -239,7 +239,8 @@ class BGExtract(BibleCommon):
            found_count += 1
        return SearchResults(bookname, chapter, verse_list)

-class CWExtract(BibleCommon):
+
+class CWExtract(object):
    """
    Extract verses from CrossWalk/BibleStudyTools
    """
--- a/openlp/plugins/bibles/lib/manager.py
+++ b/openlp/plugins/bibles/lib/manager.py
@ -30,9 +30,9 @@ from PyQt4 import QtCore

 from openlp.core.lib import SettingsManager
 from openlp.core.utils import AppLocation
+from openlp.plugins.bibles.lib import parse_reference
 from openlp.plugins.bibles.lib.db import BibleDB, BibleMeta

-from common import parse_reference
 from opensong import OpenSongBible
 from osis import OSISBible
 from csvbible import CSVBible