openlp/openlp/core/common/httputils.py

285 lines
12 KiB
Python
Raw Normal View History

2016-12-20 21:21:17 +00:00
# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
2019-04-13 13:00:22 +00:00
##########################################################################
# OpenLP - Open Source Lyrics Projection #
# ---------------------------------------------------------------------- #
# Copyright (c) 2008-2019 OpenLP Developers #
# ---------------------------------------------------------------------- #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <https://www.gnu.org/licenses/>. #
##########################################################################
2016-12-20 21:21:17 +00:00
"""
2018-01-04 06:01:35 +00:00
The :mod:`openlp.core.common.httputils` module provides the utility methods for downloading stuff.
2016-12-20 21:21:17 +00:00
"""
2016-12-20 21:59:40 +00:00
import hashlib
2016-12-20 21:21:17 +00:00
import logging
import sys
import time
from random import randint
from tempfile import gettempdir
2016-12-20 21:21:17 +00:00
2017-09-19 16:48:34 +00:00
import requests
from PyQt5 import QtCore
2017-09-19 16:48:34 +00:00
2017-10-07 07:05:07 +00:00
from openlp.core.common import trace_error_handler
from openlp.core.common.path import Path
2017-10-07 07:05:07 +00:00
from openlp.core.common.registry import Registry
from openlp.core.common.settings import ProxyMode, Settings
from openlp.core.threading import ThreadWorker
2016-12-20 21:21:17 +00:00
2018-10-02 04:39:42 +00:00
2016-12-20 21:21:17 +00:00
log = logging.getLogger(__name__ + '.__init__')
USER_AGENTS = {
'win32': [
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36'
],
'darwin': [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) '
'Chrome/26.0.1410.43 Safari/537.31',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.11 (KHTML, like Gecko) '
'Chrome/20.0.1132.57 Safari/536.11',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) '
'Chrome/20.0.1132.47 Safari/536.11',
],
'linux2': [
'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 '
'Chrome/25.0.1364.160 Safari/537.22',
'Mozilla/5.0 (X11; CrOS armv7l 2913.260.0) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.99 '
'Safari/537.11',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.27 (KHTML, like Gecko) Chrome/26.0.1389.0 Safari/537.27'
],
'default': [
'Mozilla/5.0 (X11; NetBSD amd64; rv:18.0) Gecko/20130120 Firefox/18.0'
]
}
CONNECTION_TIMEOUT = 30
CONNECTION_RETRIES = 2
def get_proxy_settings(mode=None):
"""
Create a dictionary containing the proxy settings.
:param ProxyMode | None mode: Specify the source of the proxy settings
:return: A dict using the format expected by the requests library.
:rtype: dict | None
"""
settings = Settings()
if mode is None:
mode = settings.value('advanced/proxy mode')
if mode == ProxyMode.NO_PROXY:
return {'http': None, 'https': None}
elif mode == ProxyMode.SYSTEM_PROXY:
# The requests library defaults to using the proxy settings in the environment variables
return
elif mode == ProxyMode.MANUAL_PROXY:
http_addr = settings.value('advanced/proxy http')
https_addr = settings.value('advanced/proxy https')
username = settings.value('advanced/proxy username')
password = settings.value('advanced/proxy password')
basic_auth = ''
if username:
basic_auth = '{username}:{password}@'.format(username=username, password=password)
http_value = None
https_value = None
2018-06-08 06:12:23 +00:00
if http_addr:
http_value = 'http://{basic_auth}{http_addr}'.format(basic_auth=basic_auth, http_addr=http_addr)
2018-06-08 06:12:23 +00:00
if https_addr:
https_value = 'https://{basic_auth}{https_addr}'.format(basic_auth=basic_auth, https_addr=https_addr)
return {'http': http_value, 'https': https_value}
2016-12-20 21:21:17 +00:00
def get_user_agent():
"""
Return a user agent customised for the platform the user is on.
"""
browser_list = USER_AGENTS.get(sys.platform, None)
if not browser_list:
browser_list = USER_AGENTS['default']
random_index = randint(0, len(browser_list) - 1)
return browser_list[random_index]
2018-06-08 20:55:20 +00:00
def get_web_page(url, headers=None, update_openlp=False, proxy=None):
2016-12-20 21:21:17 +00:00
"""
Attempts to download the webpage at url and returns that page or None.
:param url: The URL to be downloaded.
2018-06-08 20:55:20 +00:00
:param dict | None headers: An optional HTTP header to pass in the request to the web server.
:param update_openlp: Tells OpenLP to update itself if the page is successfully downloaded. Defaults to False.
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
2016-12-20 21:21:17 +00:00
"""
if not url:
return None
2017-09-20 16:55:21 +00:00
if not headers:
headers = {}
if 'user-agent' not in [key.lower() for key in headers.keys()]:
2017-09-19 16:48:34 +00:00
headers['User-Agent'] = get_user_agent()
2018-06-08 20:55:20 +00:00
if not isinstance(proxy, dict):
proxy = get_proxy_settings(mode=proxy)
2016-12-20 21:21:17 +00:00
log.debug('Downloading URL = %s' % url)
retries = 0
2017-09-19 16:48:34 +00:00
while retries < CONNECTION_RETRIES:
2016-12-20 21:21:17 +00:00
try:
2018-06-08 20:55:20 +00:00
response = requests.get(url, headers=headers, proxies=proxy, timeout=float(CONNECTION_TIMEOUT))
2017-09-19 16:48:34 +00:00
log.debug('Downloaded page {url}'.format(url=response.url))
2016-12-20 21:21:17 +00:00
break
except OSError:
# For now, catch OSError. All requests errors inherit from OSError
2017-09-19 16:48:34 +00:00
log.exception('Unable to connect to {url}'.format(url=url))
response = None
2017-09-20 16:55:21 +00:00
if retries >= CONNECTION_RETRIES:
2017-09-19 16:48:34 +00:00
raise ConnectionError('Unable to connect to {url}, see log for details'.format(url=url))
2017-09-20 16:55:21 +00:00
retries += 1
2018-01-04 06:01:35 +00:00
except: # noqa
2016-12-20 21:21:17 +00:00
# Don't know what's happening, so reraise the original
2017-09-19 16:48:34 +00:00
log.exception('Unknown error when trying to connect to {url}'.format(url=url))
2016-12-20 21:21:17 +00:00
raise
if update_openlp:
Registry().get('application').process_events()
2017-09-19 16:48:34 +00:00
if not response or not response.text:
log.error('{url} could not be downloaded'.format(url=url))
2016-12-20 21:21:17 +00:00
return None
2017-09-19 16:48:34 +00:00
return response.text
2016-12-20 21:21:17 +00:00
def get_url_file_size(url, proxy=None):
2016-12-20 21:21:17 +00:00
"""
Get the size of a file.
:param url: The URL of the file we want to download.
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
2016-12-20 21:21:17 +00:00
"""
retries = 0
if not isinstance(proxy, dict):
proxy = get_proxy_settings(mode=proxy)
2016-12-20 21:21:17 +00:00
while True:
try:
response = requests.head(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), allow_redirects=True)
2017-09-19 16:48:34 +00:00
return int(response.headers['Content-Length'])
except OSError:
2016-12-20 21:21:17 +00:00
if retries > CONNECTION_RETRIES:
2017-09-19 16:48:34 +00:00
raise ConnectionError('Unable to download {url}'.format(url=url))
2016-12-20 21:21:17 +00:00
else:
retries += 1
time.sleep(0.1)
continue
def download_file(update_object, url, file_path, sha256=None, proxy=None):
2016-12-20 21:59:40 +00:00
""""
Download a file given a URL. The file is retrieved in chunks, giving the ability to cancel the download at any
point. Returns False on download error.
2018-01-04 06:01:35 +00:00
:param update_object: the object which needs to be updated
2016-12-20 21:59:40 +00:00
:param url: URL to download
:param file_path: Destination file
2016-12-21 09:41:57 +00:00
:param sha256: The check sum value to be checked against the download value
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
2016-12-20 21:59:40 +00:00
"""
block_count = 0
block_size = 4096
retries = 0
if not isinstance(proxy, dict):
proxy = get_proxy_settings(mode=proxy)
2017-09-19 16:48:34 +00:00
log.debug('url_get_file: %s', url)
while retries < CONNECTION_RETRIES:
2016-12-20 21:59:40 +00:00
try:
with file_path.open('wb') as saved_file:
2019-03-08 21:25:16 +00:00
response = requests.get(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), stream=True)
2016-12-20 21:59:40 +00:00
if sha256:
2017-09-19 16:48:34 +00:00
hasher = hashlib.sha256()
# Download until finished or canceled.
for chunk in response.iter_content(chunk_size=block_size):
2018-01-04 06:01:35 +00:00
if hasattr(update_object, 'was_cancelled') and update_object.was_cancelled:
2017-09-19 16:48:34 +00:00
break
saved_file.write(chunk)
if sha256:
hasher.update(chunk)
block_count += 1
2018-01-04 06:01:35 +00:00
if hasattr(update_object, 'update_progress'):
update_object.update_progress(block_count, block_size)
2017-09-19 16:48:34 +00:00
response.close()
2016-12-20 21:59:40 +00:00
if sha256 and hasher.hexdigest() != sha256:
2017-09-19 16:48:34 +00:00
log.error('sha256 sums did not match for file %s, got %s, expected %s', file_path, hasher.hexdigest(),
sha256)
if file_path.exists():
file_path.unlink()
2016-12-20 21:59:40 +00:00
return False
2017-09-19 16:48:34 +00:00
break
except OSError:
2016-12-20 21:59:40 +00:00
trace_error_handler(log)
if retries > CONNECTION_RETRIES:
if file_path.exists():
file_path.unlink()
2016-12-20 21:59:40 +00:00
return False
else:
retries += 1
time.sleep(0.1)
continue
2018-01-04 06:01:35 +00:00
if hasattr(update_object, 'was_cancelled') and update_object.was_cancelled and file_path.exists():
file_path.unlink()
2016-12-20 21:59:40 +00:00
return True
class DownloadWorker(ThreadWorker):
"""
This worker allows a file to be downloaded in a thread
"""
download_failed = QtCore.pyqtSignal()
download_succeeded = QtCore.pyqtSignal(Path)
def __init__(self, base_url, file_name):
"""
Set up the worker object
"""
self._base_url = base_url
self._file_name = file_name
self.was_cancelled = False
super().__init__()
def start(self):
"""
Download the url to the temporary directory
"""
if self.was_cancelled:
self.quit.emit()
return
try:
dest_path = Path(gettempdir()) / 'openlp' / self._file_name
2019-02-21 21:29:00 +00:00
url = '{url}{name}'.format(url=self._base_url, name=self._file_name)
is_success = download_file(self, url, dest_path)
if is_success and not self.was_cancelled:
self.download_succeeded.emit(dest_path)
else:
self.download_failed.emit()
2019-02-21 21:29:00 +00:00
except Exception:
log.exception('Unable to download %s', url)
self.download_failed.emit()
finally:
self.quit.emit()
@QtCore.pyqtSlot()
def cancel_download(self):
"""
A slot to allow the download to be cancelled from outside of the thread
"""
self.was_cancelled = True