forked from openlp/openlp
r1341
This commit is contained in:
commit
60520315a7
|
@ -35,7 +35,7 @@ import socket
|
||||||
import urllib
|
import urllib
|
||||||
from HTMLParser import HTMLParseError
|
from HTMLParser import HTMLParseError
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
from BeautifulSoup import BeautifulSoup, NavigableString, Tag
|
||||||
|
|
||||||
from openlp.core.lib import Receiver, translate
|
from openlp.core.lib import Receiver, translate
|
||||||
from openlp.core.lib.ui import critical_error_message_box
|
from openlp.core.lib.ui import critical_error_message_box
|
||||||
|
@ -221,21 +221,14 @@ class BGExtract(object):
|
||||||
crossrefs = soup.findAll(u'sup', u'xref')
|
crossrefs = soup.findAll(u'sup', u'xref')
|
||||||
if crossrefs:
|
if crossrefs:
|
||||||
[crossref.extract() for crossref in crossrefs]
|
[crossref.extract() for crossref in crossrefs]
|
||||||
|
headings = soup.findAll(u'h5')
|
||||||
|
if headings:
|
||||||
|
[heading.extract() for heading in headings]
|
||||||
cleanup = [(re.compile('\s+'), lambda match: ' ')]
|
cleanup = [(re.compile('\s+'), lambda match: ' ')]
|
||||||
verses = BeautifulSoup(str(soup), markupMassage=cleanup)
|
verses = BeautifulSoup(str(soup), markupMassage=cleanup)
|
||||||
content = verses.find(u'div', u'result-text-style-normal')
|
|
||||||
if not content:
|
|
||||||
content = verses.find(u'div', u'result-text-style-rtl-serif')
|
|
||||||
if not content:
|
|
||||||
log.debug(u'No content found in the BibleGateway response.')
|
|
||||||
send_error_message(u'parse')
|
|
||||||
return None
|
|
||||||
verse_count = len(verses.findAll(u'sup', u'versenum'))
|
|
||||||
found_count = 0
|
|
||||||
verse_list = {}
|
verse_list = {}
|
||||||
while found_count < verse_count:
|
for verse in verses(u'sup', u'versenum'):
|
||||||
content = content.findNext(u'sup', u'versenum')
|
raw_verse_num = verse.next
|
||||||
raw_verse_num = content.next
|
|
||||||
clean_verse_num = 0
|
clean_verse_num = 0
|
||||||
# Not all verses exist in all translations and may or may not be
|
# Not all verses exist in all translations and may or may not be
|
||||||
# represented by a verse number. If they are not fine, if they are
|
# represented by a verse number. If they are not fine, if they are
|
||||||
|
@ -248,9 +241,22 @@ class BGExtract(object):
|
||||||
log.exception(u'Illegal verse number in %s %s %s:%s',
|
log.exception(u'Illegal verse number in %s %s %s:%s',
|
||||||
version, bookname, chapter, unicode(raw_verse_num))
|
version, bookname, chapter, unicode(raw_verse_num))
|
||||||
if clean_verse_num:
|
if clean_verse_num:
|
||||||
raw_verse_text = raw_verse_num.next
|
verse_text = raw_verse_num.next
|
||||||
verse_list[clean_verse_num] = unicode(raw_verse_text)
|
part = raw_verse_num.next.next
|
||||||
found_count += 1
|
while not (isinstance(part, Tag) and part.attrMap and
|
||||||
|
part.attrMap[u'class'] == u'versenum'):
|
||||||
|
# While we are still in the same verse grab all the text.
|
||||||
|
if isinstance(part, NavigableString):
|
||||||
|
verse_text = verse_text + part
|
||||||
|
if isinstance(part.next, Tag) and part.next.name == u'div':
|
||||||
|
# Run out of verses so stop.
|
||||||
|
break
|
||||||
|
part = part.next
|
||||||
|
verse_list[clean_verse_num] = unicode(verse_text)
|
||||||
|
if not verse_list:
|
||||||
|
log.debug(u'No content found in the BibleGateway response.')
|
||||||
|
send_error_message(u'parse')
|
||||||
|
return None
|
||||||
return SearchResults(bookname, chapter, verse_list)
|
return SearchResults(bookname, chapter, verse_list)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -314,14 +314,15 @@ class FoilPresenter(object):
|
||||||
i = 1
|
i = 1
|
||||||
else:
|
else:
|
||||||
i = 1
|
i = 1
|
||||||
|
author_temp = []
|
||||||
for author in strings:
|
for author in strings:
|
||||||
temp = re.split(u',(?=\D{2})|(?<=\D),|\/(?=\D{3,})|(?<=\D);',
|
temp = re.split(u',(?=\D{2})|(?<=\D),|\/(?=\D{3,})|(?<=\D);',
|
||||||
author)
|
author)
|
||||||
for tempx in temp:
|
for tempx in temp:
|
||||||
author_temp.append(tempx)
|
author_temp.append(tempx)
|
||||||
for author in author_temp:
|
for author in author_temp:
|
||||||
regex = u'^[\/,;\-\s]+|[\/,;\-\s]+$|'\
|
regex = u'^[\/,;\-\s\.]+|[\/,;\-\s\.]+$|'\
|
||||||
'\s*[0-9]{4}\s*[\-\/]?\s*([0-9]{4})?[\/,;\-\s]*$'
|
'\s*[0-9]{4}\s*[\-\/]?\s*([0-9]{4})?[\/,;\-\s\.]*$'
|
||||||
author = re.compile(regex).sub(u'', author)
|
author = re.compile(regex).sub(u'', author)
|
||||||
author = re.compile(
|
author = re.compile(
|
||||||
u'[0-9]{1,2}\.\s?J(ahr)?h\.|um\s*$|vor\s*$').sub(u'',
|
u'[0-9]{1,2}\.\s?J(ahr)?h\.|um\s*$|vor\s*$').sub(u'',
|
||||||
|
|
Loading…
Reference in New Issue