Convert html entities into letters.

This commit is contained in:
Raoul Snyman 2010-03-21 16:38:30 +02:00
commit 253c396d7c
5 changed files with 54 additions and 11 deletions

View File

@ -226,6 +226,7 @@ class MainDisplay(DisplayWidget):
``frame`` ``frame``
Image frame to be rendered Image frame to be rendered
""" """
log.debug(u'frameView %d' % (self.displayBlank))
if not self.displayBlank: if not self.displayBlank:
if transition: if transition:
if self.frame is not None: if self.frame is not None:
@ -248,14 +249,22 @@ class MainDisplay(DisplayWidget):
if not self.isVisible(): if not self.isVisible():
self.setVisible(True) self.setVisible(True)
self.showFullScreen() self.showFullScreen()
else:
self.waitingFrame = frame
self.waitingFrameTrans = transition
def blankDisplay(self, blanked=True): def blankDisplay(self, blanked=True):
log.debug(u'Blank main Display %d' % blanked)
if blanked: if blanked:
self.displayBlank = True self.displayBlank = True
self.display_text.setPixmap(QtGui.QPixmap.fromImage(self.blankFrame)) self.display_text.setPixmap(QtGui.QPixmap.fromImage(self.blankFrame))
self.waitingFrame = None
self.waitingFrameTrans = False
else: else:
self.displayBlank = False self.displayBlank = False
if self.display_frame: if self.waitingFrame:
self.frameView(self.waitingFrame, self.waitingFrameTrans)
elif self.display_frame:
self.frameView(self.display_frame) self.frameView(self.display_frame)
def onMediaQueue(self, message): def onMediaQueue(self, message):

View File

@ -524,6 +524,7 @@ class SlideController(QtGui.QWidget):
""" """
Handle the blank screen button Handle the blank screen button
""" """
log.debug(u'onBlankDisplay %d' % force)
if force: if force:
self.blankButton.setChecked(True) self.blankButton.setChecked(True)
self.blankScreen(self.blankButton.isChecked()) self.blankScreen(self.blankButton.isChecked())
@ -542,6 +543,8 @@ class SlideController(QtGui.QWidget):
Receiver.send_message(u'%s_unblank'% self.serviceItem.name.lower()) Receiver.send_message(u'%s_unblank'% self.serviceItem.name.lower())
else: else:
self.parent.mainDisplay.blankDisplay(blanked) self.parent.mainDisplay.blankDisplay(blanked)
else:
self.parent.mainDisplay.blankDisplay(blanked)
def onSlideSelected(self): def onSlideSelected(self):
""" """

View File

@ -341,17 +341,17 @@ class ImportWizardForm(QtGui.QWizard, Ui_BibleImportWizard):
download_location = self.field(u'web_location').toInt()[0] download_location = self.field(u'web_location').toInt()[0]
if download_location == DownloadLocation.Crosswalk: if download_location == DownloadLocation.Crosswalk:
bible = self.web_bible_list[DownloadLocation.Crosswalk][ bible = self.web_bible_list[DownloadLocation.Crosswalk][
unicode(self.BibleComboBox.currentText())] unicode(self.BibleComboBox.currentText(), u'utf8')]
elif download_location == DownloadLocation.BibleGateway: elif download_location == DownloadLocation.BibleGateway:
bible = self.web_bible_list[DownloadLocation.BibleGateway][ bible = self.web_bible_list[DownloadLocation.BibleGateway][
unicode(self.BibleComboBox.currentText())] self.BibleComboBox.currentText()]
importer = self.manager.import_bible(BibleFormat.WebDownload, importer = self.manager.import_bible(BibleFormat.WebDownload,
name=unicode(self.field(u'license_version').toString()), name=unicode(self.field(u'license_version').toString(), u'utf8'),
download_source=unicode(DownloadLocation.get_name(download_location)), download_source=unicode(DownloadLocation.get_name(download_location)),
download_name=unicode(bible), download_name=unicode(bible, u'utf8'),
proxy_server=unicode(self.field(u'proxy_server').toString()), proxy_server=unicode(self.field(u'proxy_server').toString(), u'utf8'),
proxy_username=unicode(self.field(u'proxy_username').toString()), proxy_username=unicode(self.field(u'proxy_username').toString(), u'utf8'),
proxy_password=unicode(self.field(u'proxy_password').toString()) proxy_password=unicode(self.field(u'proxy_password').toString(), u'utf8')
) )
success = importer.do_import() success = importer.do_import()
if success: if success:

View File

@ -27,6 +27,7 @@ import urllib2
import logging import logging
import re import re
import chardet import chardet
import htmlentitydefs
only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)' only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)(?:[ ]*-[ ]*([0-9]+|end))?)?', r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)(?:[ ]*-[ ]*([0-9]+|end))?)?',
@ -115,7 +116,6 @@ def parse_reference(reference):
log.debug(reference_list) log.debug(reference_list)
return reference_list return reference_list
class SearchResults(object): class SearchResults(object):
""" """
Encapsulate a set of search results. This is Bible-type independant. Encapsulate a set of search results. This is Bible-type independant.
@ -247,3 +247,33 @@ class BibleCommon(object):
start_tag = text.find(u'<') start_tag = text.find(u'<')
text = text.replace(u'>', u'') text = text.replace(u'>', u'')
return text.rstrip().lstrip() return text.rstrip().lstrip()
def unescape(text):
"""
Removes HTML or XML character references and entities from a text string.
Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html
@param text The HTML (or XML) source text.
@return The plain text, as a Unicode string, if necessary.
"""
def fixup(m):
text = m.group(0)
if text[:2] == u'&#':
# character reference
try:
if text[:3] == u'&#x':
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub(u'&#?\w+;', fixup, text)

View File

@ -32,7 +32,7 @@ from BeautifulSoup import BeautifulSoup, Tag, NavigableString
from openlp.core.lib import Receiver from openlp.core.lib import Receiver
from openlp.core.utils import AppLocation from openlp.core.utils import AppLocation
from common import BibleCommon, SearchResults from common import BibleCommon, SearchResults, unescape
from db import BibleDB from db import BibleDB
from openlp.plugins.bibles.lib.models import Book from openlp.plugins.bibles.lib.models import Book
@ -196,7 +196,8 @@ class BGExtract(BibleCommon):
verse_list[verse_number] = u'' verse_list[verse_number] = u''
continue continue
if isinstance(verse, NavigableString): if isinstance(verse, NavigableString):
verse_list[verse_number] = verse_list[verse_number] + verse.replace(u'&nbsp;', u' ') verse_list[verse_number] = verse_list[verse_number] + \
unescape(unicode(verse, u'utf-8').replace(u'&nbsp;', u' '))
# Delete the "0" element, since we don't need it, it's just there for # Delete the "0" element, since we don't need it, it's just there for
# some stupid initial whitespace, courtesy of Bible Gateway. # some stupid initial whitespace, courtesy of Bible Gateway.
del verse_list[0] del verse_list[0]