More fixes and add initial load of BibleGateway.

Some problems with stripping HTML tags!

bzr-revno: 85
This commit is contained in:
Tim Bentley 2008-11-09 20:11:31 +00:00
parent d2d9593ee2
commit 2f1cc4f02a
4 changed files with 137 additions and 52 deletions

View File

@ -28,6 +28,7 @@ from sqlalchemy.orm import sessionmaker, mapper
mypath=os.path.split(os.path.abspath(__file__))[0] mypath=os.path.split(os.path.abspath(__file__))[0]
sys.path.insert(0,(os.path.join(mypath, '..', '..', '..'))) sys.path.insert(0,(os.path.join(mypath, '..', '..', '..')))
from openlp.plugins.biblemanager.BibleCommon import BibleCommon
from openlp.utils import ConfigHelper from openlp.utils import ConfigHelper
import logging import logging
@ -111,7 +112,7 @@ mapper(ONTestament, testament_table)
mapper(Book, book_table) mapper(Book, book_table)
mapper(Verse, verse_table) mapper(Verse, verse_table)
class BibleDBImpl: class BibleDBImpl(BibleCommon):
global log global log
log=logging.getLogger("BibleDBMgr") log=logging.getLogger("BibleDBMgr")
log.info("BibleDB manager loaded") log.info("BibleDB manager loaded")
@ -137,6 +138,7 @@ class BibleDBImpl:
self.Session.configure(bind=self.db) self.Session.configure(bind=self.db)
def createTables(self): def createTables(self):
log.debug( "createTables")
if os.path.exists(self.biblefile): # delete bible file and set it up again if os.path.exists(self.biblefile): # delete bible file and set it up again
os.remove(self.biblefile) os.remove(self.biblefile)
meta_table.create() meta_table.create()
@ -144,6 +146,7 @@ class BibleDBImpl:
book_table.create() book_table.create()
verse_table.create() verse_table.create()
self.loadMeta("dbversion", "0.1") self.loadMeta("dbversion", "0.1")
self._loadTestaments
def createChapter(self, bookname, chap, textlist): def createChapter(self, bookname, chap, textlist):
log.debug( "createChapter %s,%s,%s", bookname, chap, textlist) log.debug( "createChapter %s,%s,%s", bookname, chap, textlist)
@ -165,6 +168,7 @@ class BibleDBImpl:
bookmeta = Book(int(5), bookname, bookabbrev) bookmeta = Book(int(5), bookname, bookabbrev)
session.add(bookmeta) session.add(bookmeta)
session.commit() session.commit()
self._loadTestaments()
def loadMeta(self, key, value): def loadMeta(self, key, value):
metadata.bind.echo = False metadata.bind.echo = False
@ -183,12 +187,15 @@ class BibleDBImpl:
self.db.execute(s, k=key) self.db.execute(s, k=key)
def _loadTestaments(self): def _loadTestaments(self):
log.debug("loadTestaments")
metadata.bind.echo = False metadata.bind.echo = False
session = self.Session() session = self.Session()
testmeta = ONTestament(name="Old Testament") testmeta = ONTestament(name="Old Testament")
session.add(testmeta) session.add(testmeta)
testmeta = ONTestament(name="New Testament") testmeta = ONTestament(name="New Testament")
session.add(testmeta) session.add(testmeta)
testmeta = ONTestament(name="Apocrypha")
session.add(testmeta)
session.commit() session.commit()
@ -256,9 +263,3 @@ class BibleDBImpl:
log.debug( "...............................Verses ") log.debug( "...............................Verses ")
s = text (""" select * FROM verse """) s = text (""" select * FROM verse """)
log.debug( self.db.execute(s).fetchall()) log.debug( self.db.execute(s).fetchall())
def _cleanText(self, text):
text = text.replace('\n', '')
text = text.replace('\r', '')
text = text.replace('"', '')
return text

View File

@ -20,6 +20,10 @@ import os, os.path
import sys import sys
import urllib2 import urllib2
mypath=os.path.split(os.path.abspath(__file__))[0]
sys.path.insert(0,(os.path.join(mypath, '..', '..', '..')))
from openlp.plugins.biblemanager.BibleCommon import BibleCommon
import logging import logging
logging.basicConfig(level=logging.DEBUG, logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
@ -27,7 +31,7 @@ logging.basicConfig(level=logging.DEBUG,
filename='plugins.log', filename='plugins.log',
filemode='w') filemode='w')
class BibleHTTPImpl: class BibleHTTPImpl(BibleCommon):
global log global log
log=logging.getLogger("BibleHTTPMgr") log=logging.getLogger("BibleHTTPMgr")
log.info("BibleHTTP manager loaded") log.info("BibleHTTP manager loaded")
@ -40,14 +44,96 @@ class BibleHTTPImpl:
Init confirms the bible exists and stores the database path. Init confirms the bible exists and stores the database path.
""" """
bible = {} bible = {}
biblesoure = "" biblesource = ""
def setBibleSource(self,biblesource): def setBibleSource(self,biblesource):
"""
Set the source of where the bible text is comming from
"""
log.debug("setBibleSource %s", biblesource)
self.biblesource = biblesource self.biblesource = biblesource
def getBibleChapter(self, version, bookid, bookname, chapter): def getBibleChapter(self, version, bookid, bookname, chapter):
"""
Recieve the request and call the relevent handler methods
"""
log.debug( "getBibleChapter %s,%s,%s,%s", version, bookid, bookname, chapter)
log.debug("biblesource = %s", self.biblesource)
if self.biblesource == 'Crosswalk':
return self.getBibleCWChapter(version, bookid, bookname, chapter)
else:
try:
return self.getBibleBGChapter(version, bookid, bookname, chapter)
except:
log.error("Error thrown = %s", sys.exc_info()[1])
def getBibleBGChapter(self, version, bookid, bookname, chapter):
"""
Access and decode bibles via the BibleGateway website
Version - the version of the bible like 31 for New International version
bookid - Book id for the book of the bible - eg 1 for Genesis
bookname - not used
chapter - chapter number
"""
version = 49
log.debug( "getBibleBGChapter %s,%s,%s,%s", version, bookid, bookname, chapter)
urlstring = "http://www.biblegateway.com/passage/?book_id="+str(bookid)+"&chapter"+str(chapter)+"&version="+str(version)
log.debug( "Url String %s", urlstring)
xml_string = ""
req = urllib2.Request(urlstring)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
try:
handle = urllib2.urlopen(req)
xml_string = handle.read()
except IOError, e:
if hasattr(e, 'reason'):
log.error( 'Reason : ')
log.error( e.reason)
VerseSearch = "class="+'"'+"sup"+'"'+">"
verse = 1
i= xml_string.find("result-text-style-normal")
xml_string = xml_string[i:len(xml_string)]
versePos = xml_string.find(VerseSearch)
#print versePos
bible = {}
while versePos > 0:
verseText = "" # clear out string
versePos = xml_string.find("</span", versePos)
i = xml_string.find(VerseSearch, versePos+1)
#print i , versePos
if i == -1:
i = xml_string.find("</div", versePos+1)
j = xml_string.find("<strong", versePos+1)
#print i , j
if j > 0 and j < i:
i = j
verseText = xml_string[versePos + 7 : i ]
#print xml_string
print "VerseText = " + str(verse) +" "+ verseText
bible[verse] = self._cleanText(verseText) # store the verse
versePos = 0
else:
i = xml_string[:i].rfind("<span")+1
verseText = xml_string[versePos + 7 : i ] # Loose the </span>
xml_string = xml_string[i:len(xml_string)] # chop off verse 1
versePos = xml_string.find(VerseSearch) #look for the next verse
bible[verse] = self._cleanText(verseText) # store the verse
verse += 1
return bible
def getBibleCWChapter(self, version, bookid, bookname, chapter):
"""
Access and decode bibles via the Crosswaly website
Version - the version of the bible like niv for New International version
bookid - not used
bookname - text name of in english eg 'gen' for Genesis
chapter - chapter number
"""
log.debug( "getBibleCWChapter %s,%s,%s,%s", version, bookid, bookname, chapter)
urlstring = "http://bible.crosswalk.com/OnlineStudyBible/bible.cgi?word="+bookname+"+"+str(chapter)+"&version="+version urlstring = "http://bible.crosswalk.com/OnlineStudyBible/bible.cgi?word="+bookname+"+"+str(chapter)+"&version="+version
log.debug( urlstring) log.debug( "Url String %s", urlstring)
xml_string = "" xml_string = ""
req = urllib2.Request(urlstring) req = urllib2.Request(urlstring)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)') req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
@ -70,8 +156,8 @@ class BibleHTTPImpl:
versePos = xml_string.find("<BLOCKQUOTE>") versePos = xml_string.find("<BLOCKQUOTE>")
#log.debug( versePos) #log.debug( versePos)
bible = {} bible = {}
cleanbible = {}
while versePos > 0: while versePos > 0:
verseText = "" # clear out string
versePos = xml_string.find("<B><I>", versePos) + 6 versePos = xml_string.find("<B><I>", versePos) + 6
i = xml_string.find("</I></B>", versePos) i = xml_string.find("</I></B>", versePos)
#log.debug( versePos, i) #log.debug( versePos, i)
@ -88,16 +174,9 @@ class BibleHTTPImpl:
#log.debug( i, versePos) #log.debug( i, versePos)
verseText = xml_string[versePos: i] verseText = xml_string[versePos: i]
versePos = i versePos = i
bible[verse] = self._cleanVerse(verseText) bible[verse] = self._cleanText(verseText)
#bible[verse] = verseText
#log.debug( bible) #log.debug( bible)
return bible return bible
def _cleanVerse(self, text):
text = text.replace('\n', '')
text = text.replace('\r', '')
text = text.replace('&nbsp;', '')
text = text.replace('<P>', '')
text = text.replace('"', '')
return text.rstrip()

View File

@ -46,14 +46,14 @@ class BibleManager(Plugin):
Init confirms the bible exists and stores the database path. Init confirms the bible exists and stores the database path.
""" """
log.debug( "Bible Initialising") log.debug( "Bible Initialising")
self.bibleDBCache = {} self.bibleDBCache = {} # dict of bible database classes
self.bibleHTTPCache = {} self.bibleHTTPCache = {} # dict of bible http readers
self.booksOfBible = {} self.booksOfBible = {} # books of the bible linked to bibleid
self.listOfBooks = [] self.listOfBooks = [] # list of books for display
self.booksChapters = {} self.booksChapters = {} # number of chapters for each bible id
self.CWids = {} self.BGids = {} # BibleGateway IDs id for bibleid
self.verses = {} #self.verses = {} # number of verses for each book
self.verseData = {} self.verseData = {} # number of versers in each chapter by bookid
self.biblePath = ConfigHelper.getBiblePath() self.biblePath = ConfigHelper.getBiblePath()
#log.debug( self.biblePath ) #log.debug( self.biblePath )
files = os.listdir(self.biblePath) files = os.listdir(self.biblePath)
@ -62,10 +62,10 @@ class BibleManager(Plugin):
for f in files: for f in files:
b = f.split('.')[0] b = f.split('.')[0]
self.bibleDBCache[b] = BibleDBImpl(b) self.bibleDBCache[b] = BibleDBImpl(b)
biblesource = self.bibleDBCache[b].getMeta("WEB") # look to see if lazy load bible and get create getter. biblesource = self.bibleDBCache[b].getMeta("WEB") # look to see if lazy load bible exists and get create getter.
if biblesource: if biblesource:
nhttp = BibleHTTPImpl() nhttp = BibleHTTPImpl()
nhttp.setBibleSource(biblesource) nhttp.setBibleSource(biblesource) # tell The Server where to get the verses from.
self.bibleHTTPCache[b] = nhttp self.bibleHTTPCache[b] = nhttp
# #
#Load in memory objects #Load in memory objects
@ -76,7 +76,7 @@ class BibleManager(Plugin):
for line in fbibledata: for line in fbibledata:
p = line.split(",") p = line.split(",")
self.booksChapters[p[0]]=p[1] self.booksChapters[p[0]]=p[1]
self.CWids[p[0]]=p[2].replace('\n', '') self.BGids[p[0]]=p[2].replace('\n', '')
v = p[3].replace('\n', '') v = p[3].replace('\n', '')
self.verseData[p[0]] = v self.verseData[p[0]] = v
#log.debug( "\nbooks of the Bible", self.booksOfBible) #log.debug( "\nbooks of the Bible", self.booksOfBible)
@ -93,6 +93,7 @@ class BibleManager(Plugin):
Return a list of bibles from a given URL. Return a list of bibles from a given URL.
The selected Bible can then be registered and LazyLoaded into a database The selected Bible can then be registered and LazyLoaded into a database
""" """
log.debug( "registerHTTPBible %s,%s,%s,%s,%s,%s", biblename, biblesource, proxy, proxyport, proxyid, proxypass)
if self._isNewBible(biblename): if self._isNewBible(biblename):
nbible = BibleDBImpl(biblename) # Create new Bible nbible = BibleDBImpl(biblename) # Create new Bible
nbible.createTables() # Create Database nbible.createTables() # Create Database
@ -116,11 +117,12 @@ class BibleManager(Plugin):
self.bibleDBCache[biblename] = nbible self.bibleDBCache[biblename] = nbible
def loadBible(self,biblename): def loadBible(self,biblename):
log.debug( "loadBible %s", biblename)
""" """
Downloads all the books of the bible Downloads all the books of the bible
and loads it into the database and loads it into the database
""" """
log.debug( "loadBible %s", biblename)
bookabbrev = ""
for bookname in self.listOfBooks: for bookname in self.listOfBooks:
cptrs = self.booksChapters[ self.booksOfBible[bookname]] cptrs = self.booksChapters[ self.booksOfBible[bookname]]
log.debug( "book and chapter %s %s", bookname , self.booksChapters[ self.booksOfBible[bookname]] ) log.debug( "book and chapter %s %s", bookname , self.booksChapters[ self.booksOfBible[bookname]] )
@ -128,9 +130,10 @@ class BibleManager(Plugin):
c = self.bibleDBCache[biblename].getBibleChapter(bookname, chptr) # check to see if book/chapter exists c = self.bibleDBCache[biblename].getBibleChapter(bookname, chptr) # check to see if book/chapter exists
log.debug( "got chapter %s", c) log.debug( "got chapter %s", c)
if not c: if not c:
bookid = self.booksOfBible[bookname] # convert to id ie Genesis --> 1 Revelation --> 73
log.debug( "missing %s,%s", bookname, chptr) log.debug( "missing %s,%s", bookname, chptr)
self._loadBook(biblename,bookname) self._loadBook(biblename,bookid, bookname, bookabbrev)
self._loadChapter(biblename, bookname, chptr) self._loadChapter(biblename,bookid, bookname, chptr)
def getBibles(self): def getBibles(self):
""" """
@ -148,20 +151,20 @@ class BibleManager(Plugin):
return self.listOfBooks return self.listOfBooks
def getBookChapterCount(self, book): def getBookChapterCount(self, book):
log.debug( "getBookChapterCount %s", book)
""" """
Returns all the number of Chapters for a given Returns all the number of Chapters for a given
book book
""" """
log.debug( "getBookChapterCount %s", book)
bookid = self.booksOfBible[book] bookid = self.booksOfBible[book]
return self.booksChapters[bookid] return self.booksChapters[bookid]
def getBookVerseCount(self, book, chapter): def getBookVerseCount(self, book, chapter):
log.debug( "getBookVerseCount %s,%s", book, chapter)
""" """
Returns all the number of verses for a given Returns all the number of verses for a given
book and chapter book and chapter
""" """
log.debug( "getBookVerseCount %s,%s", book, chapter)
bookid = self.booksOfBible[book] bookid = self.booksOfBible[book]
v = self.verseData[bookid].split(":") v = self.verseData[bookid].split(":")
log.debug( v) log.debug( v)
@ -172,13 +175,13 @@ class BibleManager(Plugin):
Returns a list of verses for a given Book, Chapter and ranges of verses. Returns a list of verses for a given Book, Chapter and ranges of verses.
If the end verse(everse) is less then the start verse(sverse) If the end verse(everse) is less then the start verse(sverse)
then only one verse is returned then only one verse is returned
Bible - Which bible bible - Which bible to use.
Book - full text description so needs to be converted. bookname - full text description so needs to be converted.
Rest can be guessed at ! Rest can be guessed at !
""" """
#log.debug( self.bibleDBCache) #log.debug( self.bibleDBCache)
#log.debug( self.bibleHTTPCache) #log.debug( self.bibleHTTPCache)
log.debug( "getchapter %s,%s,%s,%s,%s", bible, bookname, chapter, sverse, everse) log.debug( "getVerseText %s,%s,%s,%s,%s", bible, bookname, chapter, sverse, everse)
bookid = self.booksOfBible[bookname] # convert to id ie Genesis --> 1 Revelation --> 73 bookid = self.booksOfBible[bookname] # convert to id ie Genesis --> 1 Revelation --> 73
# SORT OUT BOOKNAME BOOK ID. # SORT OUT BOOKNAME BOOK ID.
# NAME COMES IN TO ID AND BACK TO NAME ? # NAME COMES IN TO ID AND BACK TO NAME ?
@ -205,11 +208,10 @@ class BibleManager(Plugin):
def _loadChapter(self, bible, bookid,bookname, chapter): def _loadChapter(self, bible, bookid,bookname, chapter):
log.debug( "loadChapter %s,%s,%s,%s", bible, bookid,bookname, chapter) log.debug( "loadChapter %s,%s,%s,%s", bible, bookid,bookname, chapter)
try : try :
print self.bibleHTTPCache[bible]
chaptlist = self.bibleHTTPCache[bible].getBibleChapter(bible, bookid,bookname, chapter) chaptlist = self.bibleHTTPCache[bible].getBibleChapter(bible, bookid,bookname, chapter)
self.bibleDBCache[bible].createChapter(bookname, chapter, chaptlist) self.bibleDBCache[bible].createChapter(bookname, chapter, chaptlist)
except : except :
log.error("Bible %s not found in Http cache", bible) log.error("Errow thrown %s", sys.exc_info()[1])
def _isNewBible(self, name): def _isNewBible(self, name):
""" """

View File

@ -64,7 +64,7 @@ class TestBibleManager:
# Register a bible from files # Register a bible from files
log.debug( "\n.......testRegisterBibleHTTP") log.debug( "\n.......testRegisterBibleHTTP")
self.bm.registerHTTPBible("asv","Crosswalk", "", "", "", "") self.bm.registerHTTPBible("asv","Crosswalk", "", "", "", "")
#self.bm.registerBible("NIV", "ge", 1) self.bm.registerHTTPBible("nasb","Biblegateway", "", "", "", "")
b = self.bm.getBibles() b = self.bm.getBibles()
for b1 in b: for b1 in b:
log.debug( b1) log.debug( b1)
@ -99,15 +99,18 @@ class TestBibleManager:
def testGetVerseText(self): def testGetVerseText(self):
log.debug( "\n.......testGetVerseText") log.debug( "\n.......testGetVerseText")
c = self.bm.getVerseText("TheMessage",'Genesis',1,2,1) #c = self.bm.getVerseText("TheMessage",'Genesis',1,2,1)
log.debug( c ) #log.debug( c )
c = self.bm.getVerseText('NIV','Genesis',1,1,2) #c = self.bm.getVerseText('NIV','Genesis',1,1,2)
log.debug( c ) #log.debug( c )
c = self.bm.getVerseText('asv','Revelation',1,1,2) c = self.bm.getVerseText('asv','Revelation',1,1,2)
log.debug( c ) log.debug( c )
c = self.bm.getVerseText('asv','Revelation',1,5,9) #c = self.bm.getVerseText('asv','Revelation',1,5,9)
#log.debug( c )
c = self.bm.getVerseText('nasb','Revelation',1,5,9)
log.debug( c ) log.debug( c )
def testLoadBible(self): def testLoadBible(self):
log.debug( "\n.......testLoadBible") log.debug( "\n.......testLoadBible")
#self.bm.loadBible('asv') #self.bm.loadBible('asv')
self.bm.loadBible('nasb')