# -*- coding: utf-8 -*- # vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 ########################################################################## # OpenLP - Open Source Lyrics Projection # # ---------------------------------------------------------------------- # # Copyright (c) 2008-2019 OpenLP Developers # # ---------------------------------------------------------------------- # # This program is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation, either version 3 of the License, or # # (at your option) any later version. # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details. # # # # You should have received a copy of the GNU General Public License # # along with this program. If not, see . # ########################################################################## """ The :mod:`openlp.core.common.httputils` module provides the utility methods for downloading stuff. """ import hashlib import logging import sys import time from random import randint from tempfile import gettempdir import requests from PyQt5 import QtCore from openlp.core.common import trace_error_handler from openlp.core.common.path import Path from openlp.core.common.registry import Registry from openlp.core.common.settings import ProxyMode, Settings from openlp.core.threading import ThreadWorker log = logging.getLogger(__name__ + '.__init__') USER_AGENTS = { 'win32': [ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36' ], 'darwin': [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) ' 'Chrome/26.0.1410.43 Safari/537.31', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.11 (KHTML, like Gecko) ' 'Chrome/20.0.1132.57 Safari/536.11', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) ' 'Chrome/20.0.1132.47 Safari/536.11', ], 'linux2': [ 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 ' 'Chrome/25.0.1364.160 Safari/537.22', 'Mozilla/5.0 (X11; CrOS armv7l 2913.260.0) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.99 ' 'Safari/537.11', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.27 (KHTML, like Gecko) Chrome/26.0.1389.0 Safari/537.27' ], 'default': [ 'Mozilla/5.0 (X11; NetBSD amd64; rv:18.0) Gecko/20130120 Firefox/18.0' ] } CONNECTION_TIMEOUT = 30 CONNECTION_RETRIES = 2 def get_proxy_settings(mode=None): """ Create a dictionary containing the proxy settings. :param ProxyMode | None mode: Specify the source of the proxy settings :return: A dict using the format expected by the requests library. :rtype: dict | None """ settings = Settings() if mode is None: mode = settings.value('advanced/proxy mode') if mode == ProxyMode.NO_PROXY: return {'http': None, 'https': None} elif mode == ProxyMode.SYSTEM_PROXY: # The requests library defaults to using the proxy settings in the environment variables return elif mode == ProxyMode.MANUAL_PROXY: http_addr = settings.value('advanced/proxy http') https_addr = settings.value('advanced/proxy https') username = settings.value('advanced/proxy username') password = settings.value('advanced/proxy password') basic_auth = '' if username: basic_auth = '{username}:{password}@'.format(username=username, password=password) http_value = None https_value = None if http_addr: http_value = 'http://{basic_auth}{http_addr}'.format(basic_auth=basic_auth, http_addr=http_addr) if https_addr: https_value = 'https://{basic_auth}{https_addr}'.format(basic_auth=basic_auth, https_addr=https_addr) return {'http': http_value, 'https': https_value} def get_user_agent(): """ Return a user agent customised for the platform the user is on. """ browser_list = USER_AGENTS.get(sys.platform, None) if not browser_list: browser_list = USER_AGENTS['default'] random_index = randint(0, len(browser_list) - 1) return browser_list[random_index] def get_web_page(url, headers=None, update_openlp=False, proxy=None): """ Attempts to download the webpage at url and returns that page or None. :param url: The URL to be downloaded. :param dict | None headers: An optional HTTP header to pass in the request to the web server. :param update_openlp: Tells OpenLP to update itself if the page is successfully downloaded. Defaults to False. :param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'} """ if not url: return None if not headers: headers = {} if 'user-agent' not in [key.lower() for key in headers.keys()]: headers['User-Agent'] = get_user_agent() if not isinstance(proxy, dict): proxy = get_proxy_settings(mode=proxy) log.debug('Downloading URL = %s' % url) retries = 0 while retries < CONNECTION_RETRIES: try: response = requests.get(url, headers=headers, proxies=proxy, timeout=float(CONNECTION_TIMEOUT)) log.debug('Downloaded page {url}'.format(url=response.url)) break except OSError: # For now, catch OSError. All requests errors inherit from OSError log.exception('Unable to connect to {url}'.format(url=url)) response = None if retries >= CONNECTION_RETRIES: raise ConnectionError('Unable to connect to {url}, see log for details'.format(url=url)) retries += 1 except: # noqa # Don't know what's happening, so reraise the original log.exception('Unknown error when trying to connect to {url}'.format(url=url)) raise if update_openlp: Registry().get('application').process_events() if not response or not response.text: log.error('{url} could not be downloaded'.format(url=url)) return None return response.text def get_url_file_size(url, proxy=None): """ Get the size of a file. :param url: The URL of the file we want to download. :param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'} """ retries = 0 if not isinstance(proxy, dict): proxy = get_proxy_settings(mode=proxy) while True: try: response = requests.head(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), allow_redirects=True) return int(response.headers['Content-Length']) except OSError: if retries > CONNECTION_RETRIES: raise ConnectionError('Unable to download {url}'.format(url=url)) else: retries += 1 time.sleep(0.1) continue def download_file(update_object, url, file_path, sha256=None, proxy=None): """" Download a file given a URL. The file is retrieved in chunks, giving the ability to cancel the download at any point. Returns False on download error. :param update_object: the object which needs to be updated :param url: URL to download :param file_path: Destination file :param sha256: The check sum value to be checked against the download value :param dict | ProxyMode | None proxy: ProxyMode enum or a dictionary containing the proxy servers, with their types as the key e.g. {'http': 'http://proxyserver:port', 'https': 'https://proxyserver:port'} """ block_count = 0 block_size = 4096 retries = 0 if not isinstance(proxy, dict): proxy = get_proxy_settings(mode=proxy) log.debug('url_get_file: %s', url) while retries < CONNECTION_RETRIES: try: with file_path.open('wb') as saved_file: response = requests.get(url, proxies=proxy, timeout=float(CONNECTION_TIMEOUT), stream=True) if sha256: hasher = hashlib.sha256() # Download until finished or canceled. for chunk in response.iter_content(chunk_size=block_size): if hasattr(update_object, 'was_cancelled') and update_object.was_cancelled: break saved_file.write(chunk) if sha256: hasher.update(chunk) block_count += 1 if hasattr(update_object, 'update_progress'): update_object.update_progress(block_count, block_size) response.close() if sha256 and hasher.hexdigest() != sha256: log.error('sha256 sums did not match for file %s, got %s, expected %s', file_path, hasher.hexdigest(), sha256) if file_path.exists(): file_path.unlink() return False break except OSError: trace_error_handler(log) if retries > CONNECTION_RETRIES: if file_path.exists(): file_path.unlink() return False else: retries += 1 time.sleep(0.1) continue if hasattr(update_object, 'was_cancelled') and update_object.was_cancelled and file_path.exists(): file_path.unlink() return True class DownloadWorker(ThreadWorker): """ This worker allows a file to be downloaded in a thread """ download_failed = QtCore.pyqtSignal() download_succeeded = QtCore.pyqtSignal(Path) def __init__(self, base_url, file_name): """ Set up the worker object """ self._base_url = base_url self._file_name = file_name self.was_cancelled = False super().__init__() def start(self): """ Download the url to the temporary directory """ if self.was_cancelled: self.quit.emit() return try: dest_path = Path(gettempdir()) / 'openlp' / self._file_name url = '{url}{name}'.format(url=self._base_url, name=self._file_name) is_success = download_file(self, url, dest_path) if is_success and not self.was_cancelled: self.download_succeeded.emit(dest_path) else: self.download_failed.emit() except Exception: log.exception('Unable to download %s', url) self.download_failed.emit() finally: self.quit.emit() @QtCore.pyqtSlot() def cancel_download(self): """ A slot to allow the download to be cancelled from outside of the thread """ self.was_cancelled = True