openlp/openlp/core/common/httputils.py

192 lines
7.8 KiB
Python
Raw Normal View History

2016-12-20 21:21:17 +00:00
# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
2016-12-31 11:01:36 +00:00
# Copyright (c) 2008-2017 OpenLP Developers #
2016-12-20 21:21:17 +00:00
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
"""
The :mod:`openlp.core.utils` module provides the utility libraries for OpenLP.
"""
2016-12-20 21:59:40 +00:00
import hashlib
2016-12-20 21:21:17 +00:00
import logging
import sys
import time
from random import randint
2017-09-19 16:48:34 +00:00
import requests
2017-10-07 07:05:07 +00:00
from openlp.core.common import trace_error_handler
from openlp.core.common.registry import Registry
2016-12-20 21:21:17 +00:00
log = logging.getLogger(__name__ + '.__init__')
USER_AGENTS = {
'win32': [
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36'
],
'darwin': [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) '
'Chrome/26.0.1410.43 Safari/537.31',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.11 (KHTML, like Gecko) '
'Chrome/20.0.1132.57 Safari/536.11',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) '
'Chrome/20.0.1132.47 Safari/536.11',
],
'linux2': [
'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 '
'Chrome/25.0.1364.160 Safari/537.22',
'Mozilla/5.0 (X11; CrOS armv7l 2913.260.0) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.99 '
'Safari/537.11',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.27 (KHTML, like Gecko) Chrome/26.0.1389.0 Safari/537.27'
],
'default': [
'Mozilla/5.0 (X11; NetBSD amd64; rv:18.0) Gecko/20130120 Firefox/18.0'
]
}
CONNECTION_TIMEOUT = 30
CONNECTION_RETRIES = 2
def get_user_agent():
"""
Return a user agent customised for the platform the user is on.
"""
browser_list = USER_AGENTS.get(sys.platform, None)
if not browser_list:
browser_list = USER_AGENTS['default']
random_index = randint(0, len(browser_list) - 1)
return browser_list[random_index]
2017-09-19 16:48:34 +00:00
def get_web_page(url, headers=None, update_openlp=False, proxies=None):
2016-12-20 21:21:17 +00:00
"""
Attempts to download the webpage at url and returns that page or None.
:param url: The URL to be downloaded.
:param header: An optional HTTP header to pass in the request to the web server.
:param update_openlp: Tells OpenLP to update itself if the page is successfully downloaded.
Defaults to False.
"""
if not url:
return None
2017-09-20 16:55:21 +00:00
if not headers:
headers = {}
if 'user-agent' not in [key.lower() for key in headers.keys()]:
2017-09-19 16:48:34 +00:00
headers['User-Agent'] = get_user_agent()
2016-12-20 21:21:17 +00:00
log.debug('Downloading URL = %s' % url)
retries = 0
2017-09-19 16:48:34 +00:00
while retries < CONNECTION_RETRIES:
2016-12-20 21:21:17 +00:00
try:
2017-09-19 16:48:34 +00:00
response = requests.get(url, headers=headers, proxies=proxies, timeout=float(CONNECTION_TIMEOUT))
log.debug('Downloaded page {url}'.format(url=response.url))
2016-12-20 21:21:17 +00:00
break
except OSError:
# For now, catch OSError. All requests errors inherit from OSError
2017-09-19 16:48:34 +00:00
log.exception('Unable to connect to {url}'.format(url=url))
response = None
2017-09-20 16:55:21 +00:00
if retries >= CONNECTION_RETRIES:
2017-09-19 16:48:34 +00:00
raise ConnectionError('Unable to connect to {url}, see log for details'.format(url=url))
2017-09-20 16:55:21 +00:00
retries += 1
2016-12-20 21:21:17 +00:00
except:
# Don't know what's happening, so reraise the original
2017-09-19 16:48:34 +00:00
log.exception('Unknown error when trying to connect to {url}'.format(url=url))
2016-12-20 21:21:17 +00:00
raise
if update_openlp:
Registry().get('application').process_events()
2017-09-19 16:48:34 +00:00
if not response or not response.text:
log.error('{url} could not be downloaded'.format(url=url))
2016-12-20 21:21:17 +00:00
return None
2017-09-19 16:48:34 +00:00
return response.text
2016-12-20 21:21:17 +00:00
def get_url_file_size(url):
"""
Get the size of a file.
:param url: The URL of the file we want to download.
"""
retries = 0
while True:
try:
2017-09-19 16:48:34 +00:00
response = requests.head(url, timeout=float(CONNECTION_TIMEOUT), allow_redirects=True)
return int(response.headers['Content-Length'])
except OSError:
2016-12-20 21:21:17 +00:00
if retries > CONNECTION_RETRIES:
2017-09-19 16:48:34 +00:00
raise ConnectionError('Unable to download {url}'.format(url=url))
2016-12-20 21:21:17 +00:00
else:
retries += 1
time.sleep(0.1)
continue
2017-09-19 16:48:34 +00:00
def url_get_file(callback, url, file_path, sha256=None):
2016-12-20 21:59:40 +00:00
""""
Download a file given a URL. The file is retrieved in chunks, giving the ability to cancel the download at any
point. Returns False on download error.
2016-12-21 09:41:57 +00:00
:param callback: the class which needs to be updated
2016-12-20 21:59:40 +00:00
:param url: URL to download
:param file_path: Destination file
2016-12-21 09:41:57 +00:00
:param sha256: The check sum value to be checked against the download value
2016-12-20 21:59:40 +00:00
"""
block_count = 0
block_size = 4096
retries = 0
2017-09-19 16:48:34 +00:00
log.debug('url_get_file: %s', url)
while retries < CONNECTION_RETRIES:
2016-12-20 21:59:40 +00:00
try:
with file_path.open('wb') as saved_file:
2017-09-19 16:48:34 +00:00
response = requests.get(url, timeout=float(CONNECTION_TIMEOUT), stream=True)
2016-12-20 21:59:40 +00:00
if sha256:
2017-09-19 16:48:34 +00:00
hasher = hashlib.sha256()
# Download until finished or canceled.
for chunk in response.iter_content(chunk_size=block_size):
if callback.was_cancelled:
break
saved_file.write(chunk)
if sha256:
hasher.update(chunk)
block_count += 1
callback._download_progress(block_count, block_size)
response.close()
2016-12-20 21:59:40 +00:00
if sha256 and hasher.hexdigest() != sha256:
2017-09-19 16:48:34 +00:00
log.error('sha256 sums did not match for file %s, got %s, expected %s', file_path, hasher.hexdigest(),
sha256)
if file_path.exists():
file_path.unlink()
2016-12-20 21:59:40 +00:00
return False
2017-09-19 16:48:34 +00:00
break
except OSError:
2016-12-20 21:59:40 +00:00
trace_error_handler(log)
if retries > CONNECTION_RETRIES:
if file_path.exists():
file_path.unlink()
2016-12-20 21:59:40 +00:00
return False
else:
retries += 1
time.sleep(0.1)
continue
if callback.was_cancelled and file_path.exists():
file_path.unlink()
2016-12-20 21:59:40 +00:00
return True
2016-12-20 21:21:17 +00:00
__all__ = ['get_web_page']