Fixed bug #803031: If UTF-8 decoding fails, try the standard Windows codepage.

This commit is contained in:
Raoul Snyman 2011-07-08 07:57:39 +02:00
parent d02ee451f2
commit ce1abaf332

View File

@ -147,7 +147,10 @@ class BGExtract(object):
send_error_message(u'download') send_error_message(u'download')
return None return None
page_source = page.read() page_source = page.read()
page_source = unicode(page_source, 'utf8') try:
page_source = unicode(page_source, u'utf8')
except UnicodeDecodeError:
page_source = unicode(page_source, u'cp1251')
page_source_temp = re.search(u'<table .*?class="infotable".*?>.*?'\ page_source_temp = re.search(u'<table .*?class="infotable".*?>.*?'\
u'</table>', page_source, re.DOTALL) u'</table>', page_source, re.DOTALL)
if page_source_temp: if page_source_temp: