openlp/openlp/core/common/httputils.py

292 lines
12 KiB
Python

# -*- coding: utf-8 -*-
##########################################################################
# OpenLP - Open Source Lyrics Projection #
# ---------------------------------------------------------------------- #
# Copyright (c) 2008-2022 OpenLP Developers #
# ---------------------------------------------------------------------- #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <https://www.gnu.org/licenses/>. #
##########################################################################
"""
The :mod:`openlp.core.common.httputils` module provides the utility methods for downloading stuff.
"""
import hashlib
import logging
import sys
import time
from pathlib import Path
from random import randint
from tempfile import gettempdir
import requests
from PyQt5 import QtCore
from openlp.core.common import trace_error_handler
from openlp.core.common.registry import Registry
from openlp.core.common.settings import ProxyMode
from openlp.core.threading import ThreadWorker
log = logging.getLogger(__name__ + '.__init__')
USER_AGENTS = {
'win32': [
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36'
],
'darwin': [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) '
'Chrome/26.0.1410.43 Safari/537.31',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.11 (KHTML, like Gecko) '
'Chrome/20.0.1132.57 Safari/536.11',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) '
'Chrome/20.0.1132.47 Safari/536.11',
],
'linux2': [
'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 '
'Chrome/25.0.1364.160 Safari/537.22',
'Mozilla/5.0 (X11; CrOS armv7l 2913.260.0) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.99 '
'Safari/537.11',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.27 (KHTML, like Gecko) Chrome/26.0.1389.0 Safari/537.27'
],
'default': [
'Mozilla/5.0 (X11; NetBSD amd64; rv:18.0) Gecko/20130120 Firefox/18.0'
]
}
CONNECTION_TIMEOUT = 30
CONNECTION_RETRIES = 2
def get_proxy_settings(mode=None):
"""
Create a dictionary containing the proxy settings.
:param ProxyMode | None mode: Specify the source of the proxy settings
:return: A dict using the format expected by the requests library.
:rtype: dict | None
"""
settings = Registry().get('settings')
if mode is None:
mode = settings.value('advanced/proxy mode')
if mode == ProxyMode.NO_PROXY:
return {'http': None, 'https': None}
elif mode == ProxyMode.SYSTEM_PROXY:
# The requests library defaults to using the proxy settings in the environment variables
return
elif mode == ProxyMode.MANUAL_PROXY:
http_addr = settings.value('advanced/proxy http')
https_addr = settings.value('advanced/proxy https')
username = settings.value('advanced/proxy username')
password = settings.value('advanced/proxy password')
basic_auth = ''
if username:
basic_auth = '{username}:{password}@'.format(username=username, password=password)
http_value = None
https_value = None
if http_addr:
http_value = 'http://{basic_auth}{http_addr}'.format(basic_auth=basic_auth, http_addr=http_addr)
if https_addr:
https_value = 'https://{basic_auth}{https_addr}'.format(basic_auth=basic_auth, https_addr=https_addr)
return {'http': http_value, 'https': https_value}
def get_random_user_agent():
"""
Return a random user agent customised for the platform the user is on.
"""
browser_list = USER_AGENTS.get(sys.platform, None)
if not browser_list:
browser_list = USER_AGENTS['default']
random_index = randint(0, len(browser_list) - 1)
return browser_list[random_index]
def get_openlp_user_agent():
"""
Return the OpenLP user agent
"""
return 'OpenLP/' + Registry().get('application-qt').applicationVersion()
def get_web_page(url, headers=None, update_openlp=False, proxy=None):
"""
Attempts to download the webpage at url and returns that page or None.
:param url: The URL to be downloaded.
:param dict | None headers: An optional HTTP header to pass in the request to the web server.
:param update_openlp: Tells OpenLP to update itself if the page is successfully downloaded. Defaults to False.
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
"""
if not url:
return None
if not headers:
headers = {}
if 'user-agent' not in [key.lower() for key in headers.keys()]:
headers['User-Agent'] = get_random_user_agent()
if not isinstance(proxy, dict):
proxy = get_proxy_settings(mode=proxy)
log.debug('Downloading URL = %s' % url)
retries = 0
while retries < CONNECTION_RETRIES:
try:
response = requests.get(url, headers=headers, proxies=proxy, timeout=float(CONNECTION_TIMEOUT))
log.debug('Downloaded page {url}'.format(url=response.url))
break
except OSError:
# For now, catch OSError. All requests errors inherit from OSError
log.exception('Unable to connect to {url}'.format(url=url))
response = None
if retries >= CONNECTION_RETRIES:
raise ConnectionError('Unable to connect to {url}, see log for details'.format(url=url))
retries += 1
except: # noqa
# Don't know what's happening, so reraise the original
log.exception('Unknown error when trying to connect to {url}'.format(url=url))
raise
if update_openlp:
Registry().get('application').process_events()
if not response or not response.text:
log.error('{url} could not be downloaded'.format(url=url))
return None
return response.text
def get_url_file_size(url, proxy=None):
"""
Get the size of a file.
:param url: The URL of the file we want to download.
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
"""
retries = 0
if not isinstance(proxy, dict):
proxy = get_proxy_settings(mode=proxy)
while True:
try:
response = requests.head(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), allow_redirects=True)
return int(response.headers['Content-Length'])
except OSError:
if retries > CONNECTION_RETRIES:
raise ConnectionError('Unable to download {url}'.format(url=url))
else:
retries += 1
time.sleep(0.1)
continue
def download_file(update_object, url, file_path, sha256=None, proxy=None):
""""
Download a file given a URL. The file is retrieved in chunks, giving the ability to cancel the download at any
point. Returns False on download error.
:param update_object: the object which needs to be updated
:param url: URL to download
:param file_path: Destination file
:param sha256: The check sum value to be checked against the download value
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
"""
block_count = 0
block_size = 4096
retries = 0
if not isinstance(proxy, dict):
proxy = get_proxy_settings(mode=proxy)
log.debug('url_get_file: %s', url)
while retries < CONNECTION_RETRIES:
try:
with file_path.open('wb') as saved_file:
response = requests.get(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), stream=True)
if sha256:
hasher = hashlib.sha256()
# Download until finished or canceled.
for chunk in response.iter_content(chunk_size=block_size):
if hasattr(update_object, 'is_cancelled') and update_object.is_cancelled:
break
saved_file.write(chunk)
if sha256:
hasher.update(chunk)
block_count += 1
if hasattr(update_object, 'update_progress'):
update_object.update_progress(block_count, block_size)
response.close()
if sha256 and hasher.hexdigest() != sha256:
log.error('sha256 sums did not match for file %s, got %s, expected %s', file_path, hasher.hexdigest(),
sha256)
if file_path.exists():
file_path.unlink()
return False
break
except OSError:
trace_error_handler(log)
if retries > CONNECTION_RETRIES:
if file_path.exists():
file_path.unlink()
return False
else:
retries += 1
time.sleep(0.1)
continue
if hasattr(update_object, 'is_cancelled') and update_object.is_cancelled and file_path.exists():
file_path.unlink()
return True
class DownloadWorker(ThreadWorker):
"""
This worker allows a file to be downloaded in a thread
"""
download_failed = QtCore.pyqtSignal()
download_succeeded = QtCore.pyqtSignal(Path)
def __init__(self, base_url, file_name):
"""
Set up the worker object
"""
self._base_url = base_url
self._file_name = file_name
self.is_cancelled = False
super().__init__()
def start(self):
"""
Download the url to the temporary directory
"""
if self.is_cancelled:
self.quit.emit()
return
try:
dest_path = Path(gettempdir()) / 'openlp' / self._file_name
url = '{url}{name}'.format(url=self._base_url, name=self._file_name)
is_success = download_file(self, url, dest_path)
if is_success and not self.is_cancelled:
self.download_succeeded.emit(dest_path)
else:
self.download_failed.emit()
except Exception:
log.exception('Unable to download %s', url)
self.download_failed.emit()
finally:
time.sleep(1)
self.quit.emit()
@QtCore.pyqtSlot()
def cancel_download(self):
"""
A slot to allow the download to be cancelled from outside of the thread
"""
self.is_cancelled = True