Fixed bug #907164: Exclude footnotes div and the comment after it.

Fixes: https://launchpad.net/bugs/907164
This commit is contained in:
Raoul Snyman 2011-12-21 07:22:49 +02:00
parent 5d13873ce1
commit 4e94986aa8

View File

@ -92,6 +92,15 @@ class BGExtract(object):
if headings: if headings:
for heading in headings: for heading in headings:
heading.extract() heading.extract()
chapter_notes = soup.findAll('div', 'footnotes')
if chapter_notes:
log.debug('Found chapter notes')
for note in chapter_notes:
note.extract()
note_comments = soup.findAll(text=u'end of footnotes')
if note_comments:
for comment in note_comments:
comment.extract()
cleanup = [(re.compile('\s+'), lambda match: ' ')] cleanup = [(re.compile('\s+'), lambda match: ' ')]
verses = BeautifulSoup(str(soup), markupMassage=cleanup) verses = BeautifulSoup(str(soup), markupMassage=cleanup)
verse_list = {} verse_list = {}