mirror of https://gitlab.com/openlp/openlp.git
292 lines
12 KiB
Python
292 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
##########################################################################
|
|
# OpenLP - Open Source Lyrics Projection #
|
|
# ---------------------------------------------------------------------- #
|
|
# Copyright (c) 2008-2022 OpenLP Developers #
|
|
# ---------------------------------------------------------------------- #
|
|
# This program is free software: you can redistribute it and/or modify #
|
|
# it under the terms of the GNU General Public License as published by #
|
|
# the Free Software Foundation, either version 3 of the License, or #
|
|
# (at your option) any later version. #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
# GNU General Public License for more details. #
|
|
# #
|
|
# You should have received a copy of the GNU General Public License #
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>. #
|
|
##########################################################################
|
|
"""
|
|
The :mod:`openlp.core.common.httputils` module provides the utility methods for downloading stuff.
|
|
"""
|
|
import hashlib
|
|
import logging
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
from random import randint
|
|
from tempfile import gettempdir
|
|
|
|
import requests
|
|
from PyQt5 import QtCore
|
|
|
|
from openlp.core.common import trace_error_handler
|
|
from openlp.core.common.registry import Registry
|
|
from openlp.core.common.settings import ProxyMode
|
|
from openlp.core.threading import ThreadWorker
|
|
|
|
|
|
log = logging.getLogger(__name__ + '.__init__')
|
|
|
|
USER_AGENTS = {
|
|
'win32': [
|
|
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36'
|
|
],
|
|
'darwin': [
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) '
|
|
'Chrome/26.0.1410.43 Safari/537.31',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.11 (KHTML, like Gecko) '
|
|
'Chrome/20.0.1132.57 Safari/536.11',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) '
|
|
'Chrome/20.0.1132.47 Safari/536.11',
|
|
],
|
|
'linux2': [
|
|
'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 '
|
|
'Chrome/25.0.1364.160 Safari/537.22',
|
|
'Mozilla/5.0 (X11; CrOS armv7l 2913.260.0) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.99 '
|
|
'Safari/537.11',
|
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.27 (KHTML, like Gecko) Chrome/26.0.1389.0 Safari/537.27'
|
|
],
|
|
'default': [
|
|
'Mozilla/5.0 (X11; NetBSD amd64; rv:18.0) Gecko/20130120 Firefox/18.0'
|
|
]
|
|
}
|
|
CONNECTION_TIMEOUT = 30
|
|
CONNECTION_RETRIES = 2
|
|
|
|
|
|
def get_proxy_settings(mode=None):
|
|
"""
|
|
Create a dictionary containing the proxy settings.
|
|
|
|
:param ProxyMode | None mode: Specify the source of the proxy settings
|
|
:return: A dict using the format expected by the requests library.
|
|
:rtype: dict | None
|
|
"""
|
|
settings = Registry().get('settings')
|
|
if mode is None:
|
|
mode = settings.value('advanced/proxy mode')
|
|
if mode == ProxyMode.NO_PROXY:
|
|
return {'http': None, 'https': None}
|
|
elif mode == ProxyMode.SYSTEM_PROXY:
|
|
# The requests library defaults to using the proxy settings in the environment variables
|
|
return
|
|
elif mode == ProxyMode.MANUAL_PROXY:
|
|
http_addr = settings.value('advanced/proxy http')
|
|
https_addr = settings.value('advanced/proxy https')
|
|
username = settings.value('advanced/proxy username')
|
|
password = settings.value('advanced/proxy password')
|
|
basic_auth = ''
|
|
if username:
|
|
basic_auth = '{username}:{password}@'.format(username=username, password=password)
|
|
http_value = None
|
|
https_value = None
|
|
if http_addr:
|
|
http_value = 'http://{basic_auth}{http_addr}'.format(basic_auth=basic_auth, http_addr=http_addr)
|
|
if https_addr:
|
|
https_value = 'https://{basic_auth}{https_addr}'.format(basic_auth=basic_auth, https_addr=https_addr)
|
|
return {'http': http_value, 'https': https_value}
|
|
|
|
|
|
def get_random_user_agent():
|
|
"""
|
|
Return a random user agent customised for the platform the user is on.
|
|
"""
|
|
browser_list = USER_AGENTS.get(sys.platform, None)
|
|
if not browser_list:
|
|
browser_list = USER_AGENTS['default']
|
|
random_index = randint(0, len(browser_list) - 1)
|
|
return browser_list[random_index]
|
|
|
|
|
|
def get_openlp_user_agent():
|
|
"""
|
|
Return the OpenLP user agent
|
|
"""
|
|
return 'OpenLP/' + Registry().get('application-qt').applicationVersion()
|
|
|
|
|
|
def get_web_page(url, headers=None, update_openlp=False, proxy=None):
|
|
"""
|
|
Attempts to download the webpage at url and returns that page or None.
|
|
|
|
:param url: The URL to be downloaded.
|
|
:param dict | None headers: An optional HTTP header to pass in the request to the web server.
|
|
:param update_openlp: Tells OpenLP to update itself if the page is successfully downloaded. Defaults to False.
|
|
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
|
|
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
|
|
"""
|
|
if not url:
|
|
return None
|
|
if not headers:
|
|
headers = {}
|
|
if 'user-agent' not in [key.lower() for key in headers.keys()]:
|
|
headers['User-Agent'] = get_random_user_agent()
|
|
if not isinstance(proxy, dict):
|
|
proxy = get_proxy_settings(mode=proxy)
|
|
log.debug('Downloading URL = %s' % url)
|
|
retries = 0
|
|
while retries < CONNECTION_RETRIES:
|
|
try:
|
|
response = requests.get(url, headers=headers, proxies=proxy, timeout=float(CONNECTION_TIMEOUT))
|
|
log.debug('Downloaded page {url}'.format(url=response.url))
|
|
break
|
|
except OSError:
|
|
# For now, catch OSError. All requests errors inherit from OSError
|
|
log.exception('Unable to connect to {url}'.format(url=url))
|
|
response = None
|
|
if retries >= CONNECTION_RETRIES:
|
|
raise ConnectionError('Unable to connect to {url}, see log for details'.format(url=url))
|
|
retries += 1
|
|
except: # noqa
|
|
# Don't know what's happening, so reraise the original
|
|
log.exception('Unknown error when trying to connect to {url}'.format(url=url))
|
|
raise
|
|
if update_openlp:
|
|
Registry().get('application').process_events()
|
|
if not response or not response.text:
|
|
log.error('{url} could not be downloaded'.format(url=url))
|
|
return None
|
|
return response.text
|
|
|
|
|
|
def get_url_file_size(url, proxy=None):
|
|
"""
|
|
Get the size of a file.
|
|
|
|
:param url: The URL of the file we want to download.
|
|
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
|
|
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
|
|
"""
|
|
retries = 0
|
|
if not isinstance(proxy, dict):
|
|
proxy = get_proxy_settings(mode=proxy)
|
|
while True:
|
|
try:
|
|
response = requests.head(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), allow_redirects=True)
|
|
return int(response.headers['Content-Length'])
|
|
except OSError:
|
|
if retries > CONNECTION_RETRIES:
|
|
raise ConnectionError('Unable to download {url}'.format(url=url))
|
|
else:
|
|
retries += 1
|
|
time.sleep(0.1)
|
|
continue
|
|
|
|
|
|
def download_file(update_object, url, file_path, sha256=None, proxy=None):
|
|
""""
|
|
Download a file given a URL. The file is retrieved in chunks, giving the ability to cancel the download at any
|
|
point. Returns False on download error.
|
|
|
|
:param update_object: the object which needs to be updated
|
|
:param url: URL to download
|
|
:param file_path: Destination file
|
|
:param sha256: The check sum value to be checked against the download value
|
|
:param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types
|
|
as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'}
|
|
"""
|
|
block_count = 0
|
|
block_size = 4096
|
|
retries = 0
|
|
if not isinstance(proxy, dict):
|
|
proxy = get_proxy_settings(mode=proxy)
|
|
log.debug('url_get_file: %s', url)
|
|
while retries < CONNECTION_RETRIES:
|
|
try:
|
|
with file_path.open('wb') as saved_file:
|
|
response = requests.get(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), stream=True)
|
|
if sha256:
|
|
hasher = hashlib.sha256()
|
|
# Download until finished or canceled.
|
|
for chunk in response.iter_content(chunk_size=block_size):
|
|
if hasattr(update_object, 'is_cancelled') and update_object.is_cancelled:
|
|
break
|
|
saved_file.write(chunk)
|
|
if sha256:
|
|
hasher.update(chunk)
|
|
block_count += 1
|
|
if hasattr(update_object, 'update_progress'):
|
|
update_object.update_progress(block_count, block_size)
|
|
response.close()
|
|
if sha256 and hasher.hexdigest() != sha256:
|
|
log.error('sha256 sums did not match for file %s, got %s, expected %s', file_path, hasher.hexdigest(),
|
|
sha256)
|
|
if file_path.exists():
|
|
file_path.unlink()
|
|
return False
|
|
break
|
|
except OSError:
|
|
trace_error_handler(log)
|
|
if retries > CONNECTION_RETRIES:
|
|
if file_path.exists():
|
|
file_path.unlink()
|
|
return False
|
|
else:
|
|
retries += 1
|
|
time.sleep(0.1)
|
|
continue
|
|
if hasattr(update_object, 'is_cancelled') and update_object.is_cancelled and file_path.exists():
|
|
file_path.unlink()
|
|
return True
|
|
|
|
|
|
class DownloadWorker(ThreadWorker):
|
|
"""
|
|
This worker allows a file to be downloaded in a thread
|
|
"""
|
|
download_failed = QtCore.pyqtSignal()
|
|
download_succeeded = QtCore.pyqtSignal(Path)
|
|
|
|
def __init__(self, base_url, file_name):
|
|
"""
|
|
Set up the worker object
|
|
"""
|
|
self._base_url = base_url
|
|
self._file_name = file_name
|
|
self.is_cancelled = False
|
|
super().__init__()
|
|
|
|
def start(self):
|
|
"""
|
|
Download the url to the temporary directory
|
|
"""
|
|
if self.is_cancelled:
|
|
self.quit.emit()
|
|
return
|
|
try:
|
|
dest_path = Path(gettempdir()) / 'openlp' / self._file_name
|
|
url = '{url}{name}'.format(url=self._base_url, name=self._file_name)
|
|
is_success = download_file(self, url, dest_path)
|
|
if is_success and not self.is_cancelled:
|
|
self.download_succeeded.emit(dest_path)
|
|
else:
|
|
self.download_failed.emit()
|
|
except Exception:
|
|
log.exception('Unable to download %s', url)
|
|
self.download_failed.emit()
|
|
finally:
|
|
time.sleep(1)
|
|
self.quit.emit()
|
|
|
|
@QtCore.pyqtSlot()
|
|
def cancel_download(self):
|
|
"""
|
|
A slot to allow the download to be cancelled from outside of the thread
|
|
"""
|
|
self.is_cancelled = True
|