forked from openlp/openlp
Bible reference refactor
This commit is contained in:
parent
53625d08b6
commit
fc849398a9
@ -23,8 +23,166 @@
|
||||
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
||||
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
||||
###############################################################################
|
||||
"""
|
||||
The :mod:`lib` module contains all the library functionality for the bibles
|
||||
plugin.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
###############################################################################
|
||||
# BIBLE_REFERENCE regular expression produces the following match groups:
|
||||
#
|
||||
# 0 This is a special group consisting of the whole string that matched.
|
||||
# 1 [\w ]+ The book the reference is from.
|
||||
# 2 [0-9]+ The first (possibly only) chapter in the reference.
|
||||
# 3 None|[0-9]+ None or the only verse or the first verse in a
|
||||
# verse range or the start verse in a chapter range.
|
||||
# 4 None|[0-9]+|end None or the end verse of the first verse range or
|
||||
# the end chapter of a chapter range.
|
||||
# 5 None|[0-9]+ None or the second chapter in multiple (non-ranged)
|
||||
# chapters.
|
||||
# 6 None|[0-9]+|end None, the start of the second verse range or the
|
||||
# end of a chapter range.
|
||||
# 7 None|[0-9]+|end None or the end of the second verse range.
|
||||
###############################################################################
|
||||
|
||||
BIBLE_REFERENCE = re.compile(
|
||||
r'^([\w ]+?) *([0-9]+)' # Initial book and chapter
|
||||
r'(?: *[:|v|V] *([0-9]+))?' # Verse for first chapter
|
||||
r'(?: *- *([0-9]+|end$))?' # Range for verses or chapters
|
||||
r'(?:(?:,([0-9]+))?' # Second chapter
|
||||
r' *[,|:|v|V] *([0-9]+|end$)' # More range for verses or chapters
|
||||
r'(?: *- *([0-9]+|end$))?)?$', # End of second verse range
|
||||
re.UNICODE)
|
||||
|
||||
def check_end(match_group):
|
||||
"""
|
||||
Check if a regular expression match group contains the text u'end' or
|
||||
should be converted to an int.
|
||||
|
||||
``match_group``
|
||||
The match group to check.
|
||||
"""
|
||||
if match_group == u'end':
|
||||
return -1
|
||||
else:
|
||||
return int(match_group)
|
||||
|
||||
def parse_reference(reference):
|
||||
"""
|
||||
This is the über-awesome function that takes a person's typed in string
|
||||
and converts it to a reference list, a list of references to be queried
|
||||
from the Bible database files.
|
||||
|
||||
The reference list is a list of tuples, with each tuple structured like
|
||||
this::
|
||||
(book, chapter, start_verse, end_verse)
|
||||
|
||||
``reference``
|
||||
The bible reference to parse.
|
||||
|
||||
Returns None or a reference list.
|
||||
"""
|
||||
reference = reference.strip()
|
||||
log.debug('parse_reference("%s")', reference)
|
||||
unified_ref_list = []
|
||||
match = BIBLE_REFERENCE.match(reference)
|
||||
if match:
|
||||
log.debug(u'Matched reference %s' % reference)
|
||||
book = match.group(1)
|
||||
chapter = int(match.group(2))
|
||||
if match.group(7):
|
||||
# Two verse ranges
|
||||
vr1_start = int(match.group(3))
|
||||
vr1_end = int(match.group(4))
|
||||
unified_ref_list.append((book, chapter, vr1_start, vr1_end))
|
||||
vr2_start = int(match.group(6))
|
||||
vr2_end = check_end(match.group(7))
|
||||
if match.group(5):
|
||||
# One verse range per chapter
|
||||
chapter2 = int(match.group(5))
|
||||
unified_ref_list.append((book, chapter2, vr2_start, vr2_end))
|
||||
else:
|
||||
unified_ref_list.append((book, chapter, vr2_start, vr2_end))
|
||||
elif match.group(6):
|
||||
# Chapter range with verses
|
||||
if match.group(3):
|
||||
vr1_start = int(match.group(3))
|
||||
else:
|
||||
vr1_start = 1
|
||||
if match.group(2) == match.group(4):
|
||||
vr1_end = int(match.group(6))
|
||||
unified_ref_list.append((book, chapter, vr1_start, vr1_end))
|
||||
else:
|
||||
vr1_end = -1
|
||||
unified_ref_list.append((book, chapter, vr1_start, vr1_end))
|
||||
vr2_end = check_end(match.group(6))
|
||||
if int(match.group(4)) > chapter:
|
||||
for x in range(chapter + 1, int(match.group(4)) + 1):
|
||||
if x == int(match.group(4)):
|
||||
unified_ref_list.append((book, x, 1, vr2_end))
|
||||
else:
|
||||
unified_ref_list.append((book, x, 1, -1))
|
||||
elif match.group(4):
|
||||
# Chapter range or chapter and verse range
|
||||
if match.group(3):
|
||||
vr1_start = int(match.group(3))
|
||||
vr1_end = check_end(match.group(4))
|
||||
if vr1_end == -1 or vr1_end > vr1_start:
|
||||
unified_ref_list.append((book, chapter, vr1_start, vr1_end))
|
||||
else:
|
||||
log.debug(u'Ambiguous reference: %s' % reference)
|
||||
return None
|
||||
elif match.group(4) != u'end':
|
||||
for x in range(chapter, int(match.group(4)) + 1):
|
||||
unified_ref_list.append((book, x, 1, -1))
|
||||
else:
|
||||
log.debug(u'Unsupported reference: %s' % reference)
|
||||
return None
|
||||
elif match.group(3):
|
||||
# Single chapter and verse
|
||||
verse = int(match.group(3))
|
||||
unified_ref_list.append((book, chapter, verse, verse))
|
||||
else:
|
||||
# Single chapter
|
||||
unified_ref_list.append((book, chapter, -1, -1))
|
||||
else:
|
||||
log.debug(u'Invalid reference: %s' % reference)
|
||||
return None
|
||||
return unified_ref_list
|
||||
|
||||
|
||||
class SearchResults(object):
|
||||
"""
|
||||
Encapsulate a set of search results. This is Bible-type independant.
|
||||
"""
|
||||
def __init__(self, book, chapter, verselist):
|
||||
"""
|
||||
Create the search result object.
|
||||
|
||||
``book``
|
||||
The book of the Bible.
|
||||
|
||||
``chapter``
|
||||
The chapter of the book.
|
||||
|
||||
``verselist``
|
||||
The list of verses for this reading
|
||||
"""
|
||||
self.book = book
|
||||
self.chapter = chapter
|
||||
self.verselist = verselist
|
||||
|
||||
def has_verselist(self):
|
||||
"""
|
||||
Returns whether or not the verse list contains verses.
|
||||
"""
|
||||
return len(self.verselist) > 0
|
||||
|
||||
|
||||
from common import BibleCommon
|
||||
from manager import BibleManager
|
||||
from biblestab import BiblesTab
|
||||
from mediaitem import BibleMediaItem
|
||||
|
@ -1,256 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
|
||||
|
||||
###############################################################################
|
||||
# OpenLP - Open Source Lyrics Projection #
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Copyright (c) 2008-2010 Raoul Snyman #
|
||||
# Portions copyright (c) 2008-2010 Tim Bentley, Jonathan Corwin, Michael #
|
||||
# Gorven, Scott Guerrieri, Meinert Jordan, Andreas Preikschat, Christian #
|
||||
# Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon Tibble, #
|
||||
# Carsten Tinggaard, Frode Woldsund #
|
||||
# --------------------------------------------------------------------------- #
|
||||
# This program is free software; you can redistribute it and/or modify it #
|
||||
# under the terms of the GNU General Public License as published by the Free #
|
||||
# Software Foundation; version 2 of the License. #
|
||||
# #
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT #
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
|
||||
# more details. #
|
||||
# #
|
||||
# You should have received a copy of the GNU General Public License along #
|
||||
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
||||
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
||||
###############################################################################
|
||||
|
||||
import urllib2
|
||||
import logging
|
||||
import re
|
||||
import chardet
|
||||
import htmlentitydefs
|
||||
|
||||
only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
|
||||
r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)'
|
||||
r'(?:[ ]*-[ ]*([0-9]+|end))?)?',
|
||||
re.UNICODE)
|
||||
chapter_range = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*'
|
||||
r'([0-9]+|end)[ ]*-[ ]*([0-9]+)[ ]*[:|v|V][ ]*([0-9]+|end)',
|
||||
re.UNICODE)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def parse_reference(reference):
|
||||
"""
|
||||
This is the über-awesome function that takes a person's typed in string
|
||||
and converts it to a reference list, a list of references to be queried
|
||||
from the Bible database files.
|
||||
|
||||
The reference list is a list of tuples, with each tuple structured like
|
||||
this::
|
||||
|
||||
(book, chapter, start_verse, end_verse)
|
||||
"""
|
||||
reference = reference.strip()
|
||||
log.debug('parse_reference("%s")', reference)
|
||||
reference_list = []
|
||||
# We start with the most "complicated" match first, so that they are found
|
||||
# first, and we don't have any "false positives".
|
||||
match = chapter_range.match(reference)
|
||||
if match:
|
||||
log.debug('Found a chapter range.')
|
||||
book = match.group(1)
|
||||
from_verse = match.group(3)
|
||||
to_verse = match.group(5)
|
||||
if int(match.group(2)) == int(match.group(4)):
|
||||
reference_list.append(
|
||||
(book, int(match.group(2)), from_verse, to_verse)
|
||||
)
|
||||
else:
|
||||
if int(match.group(2)) > int(match.group(4)):
|
||||
from_chapter = int(match.group(4))
|
||||
to_chapter = int(match.group(2))
|
||||
else:
|
||||
from_chapter = int(match.group(2))
|
||||
to_chapter = int(match.group(4))
|
||||
for chapter in xrange(from_chapter, to_chapter + 1):
|
||||
if chapter == from_chapter:
|
||||
reference_list.append((book, chapter, from_verse, -1))
|
||||
elif chapter == to_chapter:
|
||||
reference_list.append((book, chapter, 1, to_verse))
|
||||
else:
|
||||
reference_list.append((book, chapter, 1, -1))
|
||||
else:
|
||||
match = only_verses.match(reference)
|
||||
if match:
|
||||
log.debug('Found a verse range.')
|
||||
book = match.group(1)
|
||||
chapter = match.group(2)
|
||||
verse = match.group(3)
|
||||
if match.group(4) is None:
|
||||
reference_list.append((book, chapter, verse, verse))
|
||||
elif match.group(5) is None:
|
||||
end_verse = match.group(4)
|
||||
if end_verse == u'end':
|
||||
end_verse = -1
|
||||
reference_list.append((book, chapter, verse, end_verse))
|
||||
elif match.group(6) is None:
|
||||
reference_list.extend([
|
||||
(book, chapter, verse, match.group(4)),
|
||||
(book, chapter, match.group(5), match.group(5))
|
||||
])
|
||||
else:
|
||||
end_verse = match.group(6)
|
||||
if end_verse == u'end':
|
||||
end_verse = -1
|
||||
reference_list.extend([
|
||||
(book, chapter, verse, match.group(4)),
|
||||
(book, chapter, match.group(5), end_verse)
|
||||
])
|
||||
else:
|
||||
log.debug('Didn\'t find anything.')
|
||||
log.debug(reference_list)
|
||||
return reference_list
|
||||
|
||||
class SearchResults(object):
|
||||
"""
|
||||
Encapsulate a set of search results. This is Bible-type independant.
|
||||
"""
|
||||
def __init__(self, book, chapter, verselist):
|
||||
"""
|
||||
Create the search result object.
|
||||
|
||||
``book``
|
||||
The book of the Bible.
|
||||
|
||||
``chapter``
|
||||
The chapter of the book.
|
||||
|
||||
``verselist``
|
||||
The list of verses for this reading
|
||||
"""
|
||||
self.book = book
|
||||
self.chapter = chapter
|
||||
self.verselist = verselist
|
||||
|
||||
def has_verselist(self):
|
||||
"""
|
||||
Returns whether or not the verse list contains verses.
|
||||
"""
|
||||
return len(self.verselist) > 0
|
||||
|
||||
|
||||
class BibleCommon(object):
|
||||
"""
|
||||
A common ancestor for bible download sites.
|
||||
"""
|
||||
log.info(u'BibleCommon')
|
||||
|
||||
def _get_web_text(self, urlstring, proxyurl):
|
||||
"""
|
||||
Get the HTML from the web page.
|
||||
|
||||
``urlstring``
|
||||
The URL of the page to open.
|
||||
|
||||
``proxyurl``
|
||||
The URL of a proxy server used to access the Internet.
|
||||
"""
|
||||
log.debug(u'get_web_text %s %s', proxyurl, urlstring)
|
||||
if proxyurl:
|
||||
proxy_support = urllib2.ProxyHandler({'http': self.proxyurl})
|
||||
http_support = urllib2.HTTPHandler()
|
||||
opener = urllib2.build_opener(proxy_support, http_support)
|
||||
urllib2.install_opener(opener)
|
||||
xml_string = u''
|
||||
req = urllib2.Request(urlstring)
|
||||
#Make us look like an IE Browser on XP to stop blocking by web site
|
||||
req.add_header(u'User-Agent',
|
||||
u'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)')
|
||||
try:
|
||||
handle = urllib2.urlopen(req)
|
||||
html = handle.read()
|
||||
details = chardet.detect(html)
|
||||
xml_string = unicode(html, details[u'encoding'])
|
||||
except IOError, e:
|
||||
if hasattr(e, u'reason'):
|
||||
log.exception(u'Reason for failure: %s', e.reason)
|
||||
return xml_string
|
||||
|
||||
def _clean_text(self, text):
|
||||
"""
|
||||
Clean up text and remove extra characters after been downloaded from
|
||||
the Internet.
|
||||
|
||||
``text``
|
||||
The text from the web page that needs to be cleaned up.
|
||||
"""
|
||||
#return text.rstrip()
|
||||
# Remove Headings from the Text
|
||||
start_tag = text.find(u'<h')
|
||||
while start_tag > -1:
|
||||
end_tag = text.find(u'</h', start_tag)
|
||||
text = text[:(start_tag - 1)] + text[(end_tag + 4)]
|
||||
start_tag = text.find(u'<h')
|
||||
# Remove Support References from the Text
|
||||
start_tag = text.find(u'<sup>')
|
||||
while start_tag > -1:
|
||||
end_tag = text.find(u'</sup>')
|
||||
text = text[:start_tag] + text[end_tag + 6:len(text)]
|
||||
start_tag = text.find(u'<sup>')
|
||||
start_tag = text.find(u'<SUP>')
|
||||
while start_tag > -1:
|
||||
end_tag = text.find(u'</SUP>')
|
||||
text = text[:start_tag] + text[end_tag + 6:len(text)]
|
||||
start_tag = text.find(u'<SUP>')
|
||||
# Static Clean ups
|
||||
text = text.replace(u'\n', u'')
|
||||
text = text.replace(u'\r', u'')
|
||||
text = text.replace(u' ', u'')
|
||||
text = text.replace(u'<P>', u'')
|
||||
text = text.replace(u'<I>', u'')
|
||||
text = text.replace(u'</I>', u'')
|
||||
text = text.replace(u'<P />', u'')
|
||||
text = text.replace(u'<p />', u'')
|
||||
text = text.replace(u'</P>', u'')
|
||||
text = text.replace(u'<BR>', u'')
|
||||
text = text.replace(u'<BR />', u'')
|
||||
text = text.replace(u'"', u'\"')
|
||||
text = text.replace(u''', u'\'')
|
||||
# Remove some other tags
|
||||
start_tag = text.find(u'<')
|
||||
while start_tag > -1:
|
||||
end_tag = text.find(u'>', start_tag)
|
||||
text = text[:start_tag] + text[end_tag + 1:]
|
||||
start_tag = text.find(u'<')
|
||||
text = text.replace(u'>', u'')
|
||||
return text.rstrip().lstrip()
|
||||
|
||||
|
||||
def unescape(text):
|
||||
"""
|
||||
Removes HTML or XML character references and entities from a text string.
|
||||
Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html
|
||||
|
||||
@param text The HTML (or XML) source text.
|
||||
@return The plain text, as a Unicode string, if necessary.
|
||||
"""
|
||||
def fixup(markup):
|
||||
text = markup.group(0)
|
||||
if text.startswith(u'&#'):
|
||||
# character reference
|
||||
try:
|
||||
if text.startswith(u'&#x'):
|
||||
return unichr(int(text[3:-1], 16))
|
||||
else:
|
||||
return unichr(int(text[2:-1]))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# named entity
|
||||
try:
|
||||
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
||||
except KeyError:
|
||||
pass
|
||||
return text # leave as is
|
||||
return re.sub(u'&#?\w+;', fixup, text)
|
@ -36,7 +36,7 @@ from BeautifulSoup import BeautifulSoup, NavigableString
|
||||
|
||||
from openlp.core.lib import Receiver
|
||||
from openlp.core.utils import AppLocation
|
||||
from openlp.plugins.bibles.lib.common import BibleCommon, SearchResults
|
||||
from openlp.plugins.bibles.lib import SearchResults
|
||||
from openlp.plugins.bibles.lib.db import BibleDB, Book
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -177,7 +177,7 @@ class HTTPBooks(object):
|
||||
return 0
|
||||
|
||||
|
||||
class BGExtract(BibleCommon):
|
||||
class BGExtract(object):
|
||||
"""
|
||||
Extract verses from BibleGateway
|
||||
"""
|
||||
@ -239,7 +239,8 @@ class BGExtract(BibleCommon):
|
||||
found_count += 1
|
||||
return SearchResults(bookname, chapter, verse_list)
|
||||
|
||||
class CWExtract(BibleCommon):
|
||||
|
||||
class CWExtract(object):
|
||||
"""
|
||||
Extract verses from CrossWalk/BibleStudyTools
|
||||
"""
|
||||
|
@ -30,9 +30,9 @@ from PyQt4 import QtCore
|
||||
|
||||
from openlp.core.lib import SettingsManager
|
||||
from openlp.core.utils import AppLocation
|
||||
from openlp.plugins.bibles.lib import parse_reference
|
||||
from openlp.plugins.bibles.lib.db import BibleDB, BibleMeta
|
||||
|
||||
from common import parse_reference
|
||||
from opensong import OpenSongBible
|
||||
from osis import OSISBible
|
||||
from csvbible import CSVBible
|
||||
|
Loading…
Reference in New Issue
Block a user