forked from openlp/openlp
Bug fixes for the Bible imports.
bzr-revno: 751
This commit is contained in:
commit
82a6bc791f
@ -341,17 +341,17 @@ class ImportWizardForm(QtGui.QWizard, Ui_BibleImportWizard):
|
|||||||
download_location = self.field(u'web_location').toInt()[0]
|
download_location = self.field(u'web_location').toInt()[0]
|
||||||
if download_location == DownloadLocation.Crosswalk:
|
if download_location == DownloadLocation.Crosswalk:
|
||||||
bible = self.web_bible_list[DownloadLocation.Crosswalk][
|
bible = self.web_bible_list[DownloadLocation.Crosswalk][
|
||||||
unicode(self.BibleComboBox.currentText())]
|
unicode(self.BibleComboBox.currentText(), u'utf8')]
|
||||||
elif download_location == DownloadLocation.BibleGateway:
|
elif download_location == DownloadLocation.BibleGateway:
|
||||||
bible = self.web_bible_list[DownloadLocation.BibleGateway][
|
bible = self.web_bible_list[DownloadLocation.BibleGateway][
|
||||||
unicode(self.BibleComboBox.currentText())]
|
self.BibleComboBox.currentText()]
|
||||||
importer = self.manager.import_bible(BibleFormat.WebDownload,
|
importer = self.manager.import_bible(BibleFormat.WebDownload,
|
||||||
name=unicode(self.field(u'license_version').toString()),
|
name=unicode(self.field(u'license_version').toString(), u'utf8'),
|
||||||
download_source=unicode(DownloadLocation.get_name(download_location)),
|
download_source=unicode(DownloadLocation.get_name(download_location)),
|
||||||
download_name=unicode(bible),
|
download_name=unicode(bible, u'utf8'),
|
||||||
proxy_server=unicode(self.field(u'proxy_server').toString()),
|
proxy_server=unicode(self.field(u'proxy_server').toString(), u'utf8'),
|
||||||
proxy_username=unicode(self.field(u'proxy_username').toString()),
|
proxy_username=unicode(self.field(u'proxy_username').toString(), u'utf8'),
|
||||||
proxy_password=unicode(self.field(u'proxy_password').toString())
|
proxy_password=unicode(self.field(u'proxy_password').toString(), u'utf8')
|
||||||
)
|
)
|
||||||
success = importer.do_import()
|
success = importer.do_import()
|
||||||
if success:
|
if success:
|
||||||
|
@ -27,6 +27,7 @@ import urllib2
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import chardet
|
import chardet
|
||||||
|
import htmlentitydefs
|
||||||
|
|
||||||
only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
|
only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
|
||||||
r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)(?:[ ]*-[ ]*([0-9]+|end))?)?',
|
r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)(?:[ ]*-[ ]*([0-9]+|end))?)?',
|
||||||
@ -115,7 +116,6 @@ def parse_reference(reference):
|
|||||||
log.debug(reference_list)
|
log.debug(reference_list)
|
||||||
return reference_list
|
return reference_list
|
||||||
|
|
||||||
|
|
||||||
class SearchResults(object):
|
class SearchResults(object):
|
||||||
"""
|
"""
|
||||||
Encapsulate a set of search results. This is Bible-type independant.
|
Encapsulate a set of search results. This is Bible-type independant.
|
||||||
@ -247,3 +247,33 @@ class BibleCommon(object):
|
|||||||
start_tag = text.find(u'<')
|
start_tag = text.find(u'<')
|
||||||
text = text.replace(u'>', u'')
|
text = text.replace(u'>', u'')
|
||||||
return text.rstrip().lstrip()
|
return text.rstrip().lstrip()
|
||||||
|
|
||||||
|
|
||||||
|
def unescape(text):
|
||||||
|
"""
|
||||||
|
Removes HTML or XML character references and entities from a text string.
|
||||||
|
Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html
|
||||||
|
|
||||||
|
@param text The HTML (or XML) source text.
|
||||||
|
@return The plain text, as a Unicode string, if necessary.
|
||||||
|
"""
|
||||||
|
def fixup(m):
|
||||||
|
text = m.group(0)
|
||||||
|
if text[:2] == u'&#':
|
||||||
|
# character reference
|
||||||
|
try:
|
||||||
|
if text[:3] == u'&#x':
|
||||||
|
return unichr(int(text[3:-1], 16))
|
||||||
|
else:
|
||||||
|
return unichr(int(text[2:-1]))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# named entity
|
||||||
|
try:
|
||||||
|
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return text # leave as is
|
||||||
|
return re.sub(u'&#?\w+;', fixup, text)
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ from BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
|||||||
|
|
||||||
from openlp.core.lib import Receiver
|
from openlp.core.lib import Receiver
|
||||||
from openlp.core.utils import AppLocation
|
from openlp.core.utils import AppLocation
|
||||||
from common import BibleCommon, SearchResults
|
from common import BibleCommon, SearchResults, unescape
|
||||||
from db import BibleDB
|
from db import BibleDB
|
||||||
from openlp.plugins.bibles.lib.models import Book
|
from openlp.plugins.bibles.lib.models import Book
|
||||||
|
|
||||||
@ -196,7 +196,8 @@ class BGExtract(BibleCommon):
|
|||||||
verse_list[verse_number] = u''
|
verse_list[verse_number] = u''
|
||||||
continue
|
continue
|
||||||
if isinstance(verse, NavigableString):
|
if isinstance(verse, NavigableString):
|
||||||
verse_list[verse_number] = verse_list[verse_number] + verse.replace(u' ', u' ')
|
verse_list[verse_number] = verse_list[verse_number] + \
|
||||||
|
unescape(unicode(verse, u'utf-8').replace(u' ', u' '))
|
||||||
# Delete the "0" element, since we don't need it, it's just there for
|
# Delete the "0" element, since we don't need it, it's just there for
|
||||||
# some stupid initial whitespace, courtesy of Bible Gateway.
|
# some stupid initial whitespace, courtesy of Bible Gateway.
|
||||||
del verse_list[0]
|
del verse_list[0]
|
||||||
|
Loading…
Reference in New Issue
Block a user