Fixes for python 3.5.

This commit is contained in:
Tomas Groth 2015-10-17 23:07:28 +01:00
parent 50ceb568e5
commit 568904a6cf
2 changed files with 8 additions and 8 deletions

View File

@ -27,7 +27,6 @@ import re
import socket import socket
import urllib.parse import urllib.parse
import urllib.error import urllib.error
from html.parser import HTMLParseError
from bs4 import BeautifulSoup, NavigableString, Tag from bs4 import BeautifulSoup, NavigableString, Tag
@ -290,7 +289,7 @@ class BGExtract(RegistryProperties):
page_source = str(page_source, 'cp1251') page_source = str(page_source, 'cp1251')
try: try:
soup = BeautifulSoup(page_source) soup = BeautifulSoup(page_source)
except HTMLParseError: except Exception:
log.error('BeautifulSoup could not parse the Bible page.') log.error('BeautifulSoup could not parse the Bible page.')
send_error_message('parse') send_error_message('parse')
return None return None
@ -762,7 +761,7 @@ def get_soup_for_bible_ref(reference_url, header=None, pre_parse_regex=None, pre
try: try:
soup = BeautifulSoup(page_source) soup = BeautifulSoup(page_source)
CLEANER_REGEX.sub('', str(soup)) CLEANER_REGEX.sub('', str(soup))
except HTMLParseError: except Exception:
log.exception('BeautifulSoup could not parse the bible page.') log.exception('BeautifulSoup could not parse the bible page.')
if not soup: if not soup:
send_error_message('parse') send_error_message('parse')

View File

@ -27,6 +27,7 @@ from http.cookiejar import CookieJar
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import HTTPCookieProcessor, URLError, build_opener from urllib.request import HTTPCookieProcessor, URLError, build_opener
from html.parser import HTMLParser from html.parser import HTMLParser
from html import unescape
from bs4 import BeautifulSoup, NavigableString from bs4 import BeautifulSoup, NavigableString
@ -130,8 +131,8 @@ class SongSelectImport(object):
break break
for result in search_results: for result in search_results:
song = { song = {
'title': self.html_parser.unescape(result.find('h3').string), 'title': unescape(result.find('h3').string),
'authors': [self.html_parser.unescape(author.string) for author in result.find_all('li')], 'authors': [unescape(author.string) for author in result.find_all('li')],
'link': BASE_URL + result.find('a')['href'] 'link': BASE_URL + result.find('a')['href']
} }
if callback: if callback:
@ -167,7 +168,7 @@ class SongSelectImport(object):
if callback: if callback:
callback() callback()
song['copyright'] = '/'.join([li.string for li in song_page.find('ul', 'copyright').find_all('li')]) song['copyright'] = '/'.join([li.string for li in song_page.find('ul', 'copyright').find_all('li')])
song['copyright'] = self.html_parser.unescape(song['copyright']) song['copyright'] = unescape(song['copyright'])
song['ccli_number'] = song_page.find('ul', 'info').find('li').string.split(':')[1].strip() song['ccli_number'] = song_page.find('ul', 'info').find('li').string.split(':')[1].strip()
song['verses'] = [] song['verses'] = []
verses = lyrics_page.find('section', 'lyrics').find_all('p') verses = lyrics_page.find('section', 'lyrics').find_all('p')
@ -180,9 +181,9 @@ class SongSelectImport(object):
else: else:
verse['lyrics'] += '\n' verse['lyrics'] += '\n'
verse['lyrics'] = verse['lyrics'].strip(' \n\r\t') verse['lyrics'] = verse['lyrics'].strip(' \n\r\t')
song['verses'].append(self.html_parser.unescape(verse)) song['verses'].append(unescape(verse))
for counter, author in enumerate(song['authors']): for counter, author in enumerate(song['authors']):
song['authors'][counter] = self.html_parser.unescape(author) song['authors'][counter] = unescape(author)
return song return song
def save_song(self, song): def save_song(self, song):