More fixes and add initial load of BibleGateway.

Some problems with stripping HTML tags!

bzr-revno: 85
This commit is contained in:
Tim Bentley 2008-11-09 20:11:31 +00:00
parent d2d9593ee2
commit 2f1cc4f02a
4 changed files with 137 additions and 52 deletions

View File

@ -28,6 +28,7 @@ from sqlalchemy.orm import sessionmaker, mapper
mypath=os.path.split(os.path.abspath(__file__))[0]
sys.path.insert(0,(os.path.join(mypath, '..', '..', '..')))
from openlp.plugins.biblemanager.BibleCommon import BibleCommon
from openlp.utils import ConfigHelper
import logging
@ -111,7 +112,7 @@ mapper(ONTestament, testament_table)
mapper(Book, book_table)
mapper(Verse, verse_table)
class BibleDBImpl:
class BibleDBImpl(BibleCommon):
global log
log=logging.getLogger("BibleDBMgr")
log.info("BibleDB manager loaded")
@ -137,6 +138,7 @@ class BibleDBImpl:
self.Session.configure(bind=self.db)
def createTables(self):
log.debug( "createTables")
if os.path.exists(self.biblefile): # delete bible file and set it up again
os.remove(self.biblefile)
meta_table.create()
@ -144,6 +146,7 @@ class BibleDBImpl:
book_table.create()
verse_table.create()
self.loadMeta("dbversion", "0.1")
self._loadTestaments
def createChapter(self, bookname, chap, textlist):
log.debug( "createChapter %s,%s,%s", bookname, chap, textlist)
@ -165,6 +168,7 @@ class BibleDBImpl:
bookmeta = Book(int(5), bookname, bookabbrev)
session.add(bookmeta)
session.commit()
self._loadTestaments()
def loadMeta(self, key, value):
metadata.bind.echo = False
@ -183,12 +187,15 @@ class BibleDBImpl:
self.db.execute(s, k=key)
def _loadTestaments(self):
metadata.bind.echo = False
log.debug("loadTestaments")
metadata.bind.echo = False
session = self.Session()
testmeta = ONTestament(name="Old Testament")
session.add(testmeta)
testmeta = ONTestament(name="New Testament")
session.add(testmeta)
testmeta = ONTestament(name="Apocrypha")
session.add(testmeta)
session.commit()
@ -255,10 +262,4 @@ class BibleDBImpl:
log.debug( self.db.execute(s).fetchall())
log.debug( "...............................Verses ")
s = text (""" select * FROM verse """)
log.debug( self.db.execute(s).fetchall())
def _cleanText(self, text):
text = text.replace('\n', '')
text = text.replace('\r', '')
text = text.replace('"', '')
return text
log.debug( self.db.execute(s).fetchall())

View File

@ -20,6 +20,10 @@ import os, os.path
import sys
import urllib2
mypath=os.path.split(os.path.abspath(__file__))[0]
sys.path.insert(0,(os.path.join(mypath, '..', '..', '..')))
from openlp.plugins.biblemanager.BibleCommon import BibleCommon
import logging
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
@ -27,7 +31,7 @@ logging.basicConfig(level=logging.DEBUG,
filename='plugins.log',
filemode='w')
class BibleHTTPImpl:
class BibleHTTPImpl(BibleCommon):
global log
log=logging.getLogger("BibleHTTPMgr")
log.info("BibleHTTP manager loaded")
@ -40,14 +44,96 @@ class BibleHTTPImpl:
Init confirms the bible exists and stores the database path.
"""
bible = {}
biblesoure = ""
biblesource = ""
def setBibleSource(self,biblesource):
"""
Set the source of where the bible text is comming from
"""
log.debug("setBibleSource %s", biblesource)
self.biblesource = biblesource
def getBibleChapter(self, version, bookid,bookname, chapter):
def getBibleChapter(self, version, bookid, bookname, chapter):
"""
Recieve the request and call the relevent handler methods
"""
log.debug( "getBibleChapter %s,%s,%s,%s", version, bookid, bookname, chapter)
log.debug("biblesource = %s", self.biblesource)
if self.biblesource == 'Crosswalk':
return self.getBibleCWChapter(version, bookid, bookname, chapter)
else:
try:
return self.getBibleBGChapter(version, bookid, bookname, chapter)
except:
log.error("Error thrown = %s", sys.exc_info()[1])
def getBibleBGChapter(self, version, bookid, bookname, chapter):
"""
Access and decode bibles via the BibleGateway website
Version - the version of the bible like 31 for New International version
bookid - Book id for the book of the bible - eg 1 for Genesis
bookname - not used
chapter - chapter number
"""
version = 49
log.debug( "getBibleBGChapter %s,%s,%s,%s", version, bookid, bookname, chapter)
urlstring = "http://www.biblegateway.com/passage/?book_id="+str(bookid)+"&chapter"+str(chapter)+"&version="+str(version)
log.debug( "Url String %s", urlstring)
xml_string = ""
req = urllib2.Request(urlstring)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
try:
handle = urllib2.urlopen(req)
xml_string = handle.read()
except IOError, e:
if hasattr(e, 'reason'):
log.error( 'Reason : ')
log.error( e.reason)
VerseSearch = "class="+'"'+"sup"+'"'+">"
verse = 1
i= xml_string.find("result-text-style-normal")
xml_string = xml_string[i:len(xml_string)]
versePos = xml_string.find(VerseSearch)
#print versePos
bible = {}
while versePos > 0:
verseText = "" # clear out string
versePos = xml_string.find("</span", versePos)
i = xml_string.find(VerseSearch, versePos+1)
#print i , versePos
if i == -1:
i = xml_string.find("</div", versePos+1)
j = xml_string.find("<strong", versePos+1)
#print i , j
if j > 0 and j < i:
i = j
verseText = xml_string[versePos + 7 : i ]
#print xml_string
print "VerseText = " + str(verse) +" "+ verseText
bible[verse] = self._cleanText(verseText) # store the verse
versePos = 0
else:
i = xml_string[:i].rfind("<span")+1
verseText = xml_string[versePos + 7 : i ] # Loose the </span>
xml_string = xml_string[i:len(xml_string)] # chop off verse 1
versePos = xml_string.find(VerseSearch) #look for the next verse
bible[verse] = self._cleanText(verseText) # store the verse
verse += 1
return bible
def getBibleCWChapter(self, version, bookid, bookname, chapter):
"""
Access and decode bibles via the Crosswaly website
Version - the version of the bible like niv for New International version
bookid - not used
bookname - text name of in english eg 'gen' for Genesis
chapter - chapter number
"""
log.debug( "getBibleCWChapter %s,%s,%s,%s", version, bookid, bookname, chapter)
urlstring = "http://bible.crosswalk.com/OnlineStudyBible/bible.cgi?word="+bookname+"+"+str(chapter)+"&version="+version
log.debug( urlstring)
log.debug( "Url String %s", urlstring)
xml_string = ""
req = urllib2.Request(urlstring)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
@ -70,8 +156,8 @@ class BibleHTTPImpl:
versePos = xml_string.find("<BLOCKQUOTE>")
#log.debug( versePos)
bible = {}
cleanbible = {}
while versePos > 0:
verseText = "" # clear out string
versePos = xml_string.find("<B><I>", versePos) + 6
i = xml_string.find("</I></B>", versePos)
#log.debug( versePos, i)
@ -88,16 +174,9 @@ class BibleHTTPImpl:
#log.debug( i, versePos)
verseText = xml_string[versePos: i]
versePos = i
bible[verse] = self._cleanVerse(verseText)
bible[verse] = self._cleanText(verseText)
#bible[verse] = verseText
#log.debug( bible)
return bible
def _cleanVerse(self, text):
text = text.replace('\n', '')
text = text.replace('\r', '')
text = text.replace('&nbsp;', '')
text = text.replace('<P>', '')
text = text.replace('"', '')
return text.rstrip()

View File

@ -46,14 +46,14 @@ class BibleManager(Plugin):
Init confirms the bible exists and stores the database path.
"""
log.debug( "Bible Initialising")
self.bibleDBCache = {}
self.bibleHTTPCache = {}
self.booksOfBible = {}
self.listOfBooks = []
self.booksChapters = {}
self.CWids = {}
self.verses = {}
self.verseData = {}
self.bibleDBCache = {} # dict of bible database classes
self.bibleHTTPCache = {} # dict of bible http readers
self.booksOfBible = {} # books of the bible linked to bibleid
self.listOfBooks = [] # list of books for display
self.booksChapters = {} # number of chapters for each bible id
self.BGids = {} # BibleGateway IDs id for bibleid
#self.verses = {} # number of verses for each book
self.verseData = {} # number of versers in each chapter by bookid
self.biblePath = ConfigHelper.getBiblePath()
#log.debug( self.biblePath )
files = os.listdir(self.biblePath)
@ -62,10 +62,10 @@ class BibleManager(Plugin):
for f in files:
b = f.split('.')[0]
self.bibleDBCache[b] = BibleDBImpl(b)
biblesource = self.bibleDBCache[b].getMeta("WEB") # look to see if lazy load bible and get create getter.
biblesource = self.bibleDBCache[b].getMeta("WEB") # look to see if lazy load bible exists and get create getter.
if biblesource:
nhttp = BibleHTTPImpl()
nhttp.setBibleSource(biblesource)
nhttp.setBibleSource(biblesource) # tell The Server where to get the verses from.
self.bibleHTTPCache[b] = nhttp
#
#Load in memory objects
@ -76,7 +76,7 @@ class BibleManager(Plugin):
for line in fbibledata:
p = line.split(",")
self.booksChapters[p[0]]=p[1]
self.CWids[p[0]]=p[2].replace('\n', '')
self.BGids[p[0]]=p[2].replace('\n', '')
v = p[3].replace('\n', '')
self.verseData[p[0]] = v
#log.debug( "\nbooks of the Bible", self.booksOfBible)
@ -93,6 +93,7 @@ class BibleManager(Plugin):
Return a list of bibles from a given URL.
The selected Bible can then be registered and LazyLoaded into a database
"""
log.debug( "registerHTTPBible %s,%s,%s,%s,%s,%s", biblename, biblesource, proxy, proxyport, proxyid, proxypass)
if self._isNewBible(biblename):
nbible = BibleDBImpl(biblename) # Create new Bible
nbible.createTables() # Create Database
@ -116,11 +117,12 @@ class BibleManager(Plugin):
self.bibleDBCache[biblename] = nbible
def loadBible(self,biblename):
log.debug( "loadBible %s", biblename)
"""
Downloads all the books of the bible
and loads it into the database
"""
log.debug( "loadBible %s", biblename)
bookabbrev = ""
for bookname in self.listOfBooks:
cptrs = self.booksChapters[ self.booksOfBible[bookname]]
log.debug( "book and chapter %s %s", bookname , self.booksChapters[ self.booksOfBible[bookname]] )
@ -128,9 +130,10 @@ class BibleManager(Plugin):
c = self.bibleDBCache[biblename].getBibleChapter(bookname, chptr) # check to see if book/chapter exists
log.debug( "got chapter %s", c)
if not c:
bookid = self.booksOfBible[bookname] # convert to id ie Genesis --> 1 Revelation --> 73
log.debug( "missing %s,%s", bookname, chptr)
self._loadBook(biblename,bookname)
self._loadChapter(biblename, bookname, chptr)
self._loadBook(biblename,bookid, bookname, bookabbrev)
self._loadChapter(biblename,bookid, bookname, chptr)
def getBibles(self):
"""
@ -148,20 +151,20 @@ class BibleManager(Plugin):
return self.listOfBooks
def getBookChapterCount(self, book):
log.debug( "getBookChapterCount %s", book)
"""
Returns all the number of Chapters for a given
book
"""
log.debug( "getBookChapterCount %s", book)
bookid = self.booksOfBible[book]
return self.booksChapters[bookid]
def getBookVerseCount(self, book, chapter):
log.debug( "getBookVerseCount %s,%s", book, chapter)
"""
Returns all the number of verses for a given
book and chapter
"""
log.debug( "getBookVerseCount %s,%s", book, chapter)
bookid = self.booksOfBible[book]
v = self.verseData[bookid].split(":")
log.debug( v)
@ -172,13 +175,13 @@ class BibleManager(Plugin):
Returns a list of verses for a given Book, Chapter and ranges of verses.
If the end verse(everse) is less then the start verse(sverse)
then only one verse is returned
Bible - Which bible
Book - full text description so needs to be converted.
bible - Which bible to use.
bookname - full text description so needs to be converted.
Rest can be guessed at !
"""
#log.debug( self.bibleDBCache)
#log.debug( self.bibleHTTPCache)
log.debug( "getchapter %s,%s,%s,%s,%s", bible, bookname, chapter, sverse, everse)
log.debug( "getVerseText %s,%s,%s,%s,%s", bible, bookname, chapter, sverse, everse)
bookid = self.booksOfBible[bookname] # convert to id ie Genesis --> 1 Revelation --> 73
# SORT OUT BOOKNAME BOOK ID.
# NAME COMES IN TO ID AND BACK TO NAME ?
@ -205,11 +208,10 @@ class BibleManager(Plugin):
def _loadChapter(self, bible, bookid,bookname, chapter):
log.debug( "loadChapter %s,%s,%s,%s", bible, bookid,bookname, chapter)
try :
print self.bibleHTTPCache[bible]
chaptlist = self.bibleHTTPCache[bible].getBibleChapter(bible, bookid,bookname, chapter)
self.bibleDBCache[bible].createChapter(bookname, chapter, chaptlist)
except:
log.error("Bible %s not found in Http cache", bible)
except :
log.error("Errow thrown %s", sys.exc_info()[1])
def _isNewBible(self, name):
"""

View File

@ -64,7 +64,7 @@ class TestBibleManager:
# Register a bible from files
log.debug( "\n.......testRegisterBibleHTTP")
self.bm.registerHTTPBible("asv","Crosswalk", "", "", "", "")
#self.bm.registerBible("NIV", "ge", 1)
self.bm.registerHTTPBible("nasb","Biblegateway", "", "", "", "")
b = self.bm.getBibles()
for b1 in b:
log.debug( b1)
@ -99,15 +99,18 @@ class TestBibleManager:
def testGetVerseText(self):
log.debug( "\n.......testGetVerseText")
c = self.bm.getVerseText("TheMessage",'Genesis',1,2,1)
log.debug( c )
c = self.bm.getVerseText('NIV','Genesis',1,1,2)
log.debug( c )
#c = self.bm.getVerseText("TheMessage",'Genesis',1,2,1)
#log.debug( c )
#c = self.bm.getVerseText('NIV','Genesis',1,1,2)
#log.debug( c )
c = self.bm.getVerseText('asv','Revelation',1,1,2)
log.debug( c )
c = self.bm.getVerseText('asv','Revelation',1,5,9)
log.debug( c )
#c = self.bm.getVerseText('asv','Revelation',1,5,9)
#log.debug( c )
c = self.bm.getVerseText('nasb','Revelation',1,5,9)
log.debug( c )
def testLoadBible(self):
log.debug( "\n.......testLoadBible")
#self.bm.loadBible('asv')
self.bm.loadBible('nasb')