Convert html entities into letters.

This commit is contained in:
Raoul Snyman 2010-03-21 16:38:30 +02:00
commit 253c396d7c
5 changed files with 54 additions and 11 deletions

View File

@ -226,6 +226,7 @@ class MainDisplay(DisplayWidget):
``frame``
Image frame to be rendered
"""
log.debug(u'frameView %d' % (self.displayBlank))
if not self.displayBlank:
if transition:
if self.frame is not None:
@ -248,14 +249,22 @@ class MainDisplay(DisplayWidget):
if not self.isVisible():
self.setVisible(True)
self.showFullScreen()
else:
self.waitingFrame = frame
self.waitingFrameTrans = transition
def blankDisplay(self, blanked=True):
log.debug(u'Blank main Display %d' % blanked)
if blanked:
self.displayBlank = True
self.display_text.setPixmap(QtGui.QPixmap.fromImage(self.blankFrame))
self.waitingFrame = None
self.waitingFrameTrans = False
else:
self.displayBlank = False
if self.display_frame:
if self.waitingFrame:
self.frameView(self.waitingFrame, self.waitingFrameTrans)
elif self.display_frame:
self.frameView(self.display_frame)
def onMediaQueue(self, message):

View File

@ -524,6 +524,7 @@ class SlideController(QtGui.QWidget):
"""
Handle the blank screen button
"""
log.debug(u'onBlankDisplay %d' % force)
if force:
self.blankButton.setChecked(True)
self.blankScreen(self.blankButton.isChecked())
@ -542,6 +543,8 @@ class SlideController(QtGui.QWidget):
Receiver.send_message(u'%s_unblank'% self.serviceItem.name.lower())
else:
self.parent.mainDisplay.blankDisplay(blanked)
else:
self.parent.mainDisplay.blankDisplay(blanked)
def onSlideSelected(self):
"""

View File

@ -341,17 +341,17 @@ class ImportWizardForm(QtGui.QWizard, Ui_BibleImportWizard):
download_location = self.field(u'web_location').toInt()[0]
if download_location == DownloadLocation.Crosswalk:
bible = self.web_bible_list[DownloadLocation.Crosswalk][
unicode(self.BibleComboBox.currentText())]
unicode(self.BibleComboBox.currentText(), u'utf8')]
elif download_location == DownloadLocation.BibleGateway:
bible = self.web_bible_list[DownloadLocation.BibleGateway][
unicode(self.BibleComboBox.currentText())]
self.BibleComboBox.currentText()]
importer = self.manager.import_bible(BibleFormat.WebDownload,
name=unicode(self.field(u'license_version').toString()),
name=unicode(self.field(u'license_version').toString(), u'utf8'),
download_source=unicode(DownloadLocation.get_name(download_location)),
download_name=unicode(bible),
proxy_server=unicode(self.field(u'proxy_server').toString()),
proxy_username=unicode(self.field(u'proxy_username').toString()),
proxy_password=unicode(self.field(u'proxy_password').toString())
download_name=unicode(bible, u'utf8'),
proxy_server=unicode(self.field(u'proxy_server').toString(), u'utf8'),
proxy_username=unicode(self.field(u'proxy_username').toString(), u'utf8'),
proxy_password=unicode(self.field(u'proxy_password').toString(), u'utf8')
)
success = importer.do_import()
if success:

View File

@ -27,6 +27,7 @@ import urllib2
import logging
import re
import chardet
import htmlentitydefs
only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)(?:[ ]*-[ ]*([0-9]+|end))?)?',
@ -115,7 +116,6 @@ def parse_reference(reference):
log.debug(reference_list)
return reference_list
class SearchResults(object):
"""
Encapsulate a set of search results. This is Bible-type independant.
@ -247,3 +247,33 @@ class BibleCommon(object):
start_tag = text.find(u'<')
text = text.replace(u'>', u'')
return text.rstrip().lstrip()
def unescape(text):
"""
Removes HTML or XML character references and entities from a text string.
Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html
@param text The HTML (or XML) source text.
@return The plain text, as a Unicode string, if necessary.
"""
def fixup(m):
text = m.group(0)
if text[:2] == u'&#':
# character reference
try:
if text[:3] == u'&#x':
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub(u'&#?\w+;', fixup, text)

View File

@ -32,7 +32,7 @@ from BeautifulSoup import BeautifulSoup, Tag, NavigableString
from openlp.core.lib import Receiver
from openlp.core.utils import AppLocation
from common import BibleCommon, SearchResults
from common import BibleCommon, SearchResults, unescape
from db import BibleDB
from openlp.plugins.bibles.lib.models import Book
@ -196,7 +196,8 @@ class BGExtract(BibleCommon):
verse_list[verse_number] = u''
continue
if isinstance(verse, NavigableString):
verse_list[verse_number] = verse_list[verse_number] + verse.replace(u'&nbsp;', u' ')
verse_list[verse_number] = verse_list[verse_number] + \
unescape(unicode(verse, u'utf-8').replace(u'&nbsp;', u' '))
# Delete the "0" element, since we don't need it, it's just there for
# some stupid initial whitespace, courtesy of Bible Gateway.
del verse_list[0]