2016-12-20 21:21:17 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# OpenLP - Open Source Lyrics Projection #
|
|
|
|
# --------------------------------------------------------------------------- #
|
2016-12-31 11:01:36 +00:00
|
|
|
# Copyright (c) 2008-2017 OpenLP Developers #
|
2016-12-20 21:21:17 +00:00
|
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
# This program is free software; you can redistribute it and/or modify it #
|
|
|
|
# under the terms of the GNU General Public License as published by the Free #
|
|
|
|
# Software Foundation; version 2 of the License. #
|
|
|
|
# #
|
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT #
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
|
|
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
|
|
|
|
# more details. #
|
|
|
|
# #
|
|
|
|
# You should have received a copy of the GNU General Public License along #
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
|
|
|
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
|
|
|
###############################################################################
|
|
|
|
"""
|
|
|
|
The :mod:`openlp.core.utils` module provides the utility libraries for OpenLP.
|
|
|
|
"""
|
2016-12-20 21:59:40 +00:00
|
|
|
import hashlib
|
2016-12-20 21:21:17 +00:00
|
|
|
import logging
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
from random import randint
|
|
|
|
|
2017-09-19 16:48:34 +00:00
|
|
|
import requests
|
|
|
|
|
2016-12-20 21:59:40 +00:00
|
|
|
from openlp.core.common import Registry, trace_error_handler
|
2016-12-20 21:21:17 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__ + '.__init__')
|
|
|
|
|
|
|
|
USER_AGENTS = {
|
|
|
|
'win32': [
|
|
|
|
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
|
|
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
|
|
|
|
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36'
|
|
|
|
],
|
|
|
|
'darwin': [
|
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) '
|
|
|
|
'Chrome/26.0.1410.43 Safari/537.31',
|
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.11 (KHTML, like Gecko) '
|
|
|
|
'Chrome/20.0.1132.57 Safari/536.11',
|
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/536.11 (KHTML, like Gecko) '
|
|
|
|
'Chrome/20.0.1132.47 Safari/536.11',
|
|
|
|
],
|
|
|
|
'linux2': [
|
|
|
|
'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 '
|
|
|
|
'Chrome/25.0.1364.160 Safari/537.22',
|
|
|
|
'Mozilla/5.0 (X11; CrOS armv7l 2913.260.0) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.99 '
|
|
|
|
'Safari/537.11',
|
|
|
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.27 (KHTML, like Gecko) Chrome/26.0.1389.0 Safari/537.27'
|
|
|
|
],
|
|
|
|
'default': [
|
|
|
|
'Mozilla/5.0 (X11; NetBSD amd64; rv:18.0) Gecko/20130120 Firefox/18.0'
|
|
|
|
]
|
|
|
|
}
|
|
|
|
CONNECTION_TIMEOUT = 30
|
|
|
|
CONNECTION_RETRIES = 2
|
|
|
|
|
|
|
|
|
|
|
|
def get_user_agent():
|
|
|
|
"""
|
|
|
|
Return a user agent customised for the platform the user is on.
|
|
|
|
"""
|
|
|
|
browser_list = USER_AGENTS.get(sys.platform, None)
|
|
|
|
if not browser_list:
|
|
|
|
browser_list = USER_AGENTS['default']
|
|
|
|
random_index = randint(0, len(browser_list) - 1)
|
|
|
|
return browser_list[random_index]
|
|
|
|
|
|
|
|
|
2017-09-19 16:48:34 +00:00
|
|
|
def get_web_page(url, headers=None, update_openlp=False, proxies=None):
|
2016-12-20 21:21:17 +00:00
|
|
|
"""
|
|
|
|
Attempts to download the webpage at url and returns that page or None.
|
|
|
|
|
|
|
|
:param url: The URL to be downloaded.
|
|
|
|
:param header: An optional HTTP header to pass in the request to the web server.
|
|
|
|
:param update_openlp: Tells OpenLP to update itself if the page is successfully downloaded.
|
|
|
|
Defaults to False.
|
|
|
|
"""
|
|
|
|
if not url:
|
|
|
|
return None
|
2017-09-20 16:55:21 +00:00
|
|
|
if not headers:
|
|
|
|
headers = {}
|
|
|
|
if 'user-agent' not in [key.lower() for key in headers.keys()]:
|
2017-09-19 16:48:34 +00:00
|
|
|
headers['User-Agent'] = get_user_agent()
|
2016-12-20 21:21:17 +00:00
|
|
|
log.debug('Downloading URL = %s' % url)
|
|
|
|
retries = 0
|
2017-09-19 16:48:34 +00:00
|
|
|
while retries < CONNECTION_RETRIES:
|
2016-12-20 21:21:17 +00:00
|
|
|
try:
|
2017-09-19 16:48:34 +00:00
|
|
|
response = requests.get(url, headers=headers, proxies=proxies, timeout=float(CONNECTION_TIMEOUT))
|
|
|
|
log.debug('Downloaded page {url}'.format(url=response.url))
|
2016-12-20 21:21:17 +00:00
|
|
|
break
|
2017-09-19 16:48:34 +00:00
|
|
|
except IOError:
|
|
|
|
# For now, catch IOError. All requests errors inherit from IOError
|
|
|
|
log.exception('Unable to connect to {url}'.format(url=url))
|
|
|
|
response = None
|
2017-09-20 16:55:21 +00:00
|
|
|
if retries >= CONNECTION_RETRIES:
|
2017-09-19 16:48:34 +00:00
|
|
|
raise ConnectionError('Unable to connect to {url}, see log for details'.format(url=url))
|
2017-09-20 16:55:21 +00:00
|
|
|
retries += 1
|
2016-12-20 21:21:17 +00:00
|
|
|
except:
|
|
|
|
# Don't know what's happening, so reraise the original
|
2017-09-19 16:48:34 +00:00
|
|
|
log.exception('Unknown error when trying to connect to {url}'.format(url=url))
|
2016-12-20 21:21:17 +00:00
|
|
|
raise
|
|
|
|
if update_openlp:
|
|
|
|
Registry().get('application').process_events()
|
2017-09-19 16:48:34 +00:00
|
|
|
if not response or not response.text:
|
|
|
|
log.error('{url} could not be downloaded'.format(url=url))
|
2016-12-20 21:21:17 +00:00
|
|
|
return None
|
2017-09-19 16:48:34 +00:00
|
|
|
return response.text
|
2016-12-20 21:21:17 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_url_file_size(url):
|
|
|
|
"""
|
|
|
|
Get the size of a file.
|
|
|
|
|
|
|
|
:param url: The URL of the file we want to download.
|
|
|
|
"""
|
|
|
|
retries = 0
|
|
|
|
while True:
|
|
|
|
try:
|
2017-09-19 16:48:34 +00:00
|
|
|
response = requests.head(url, timeout=float(CONNECTION_TIMEOUT), allow_redirects=True)
|
|
|
|
return int(response.headers['Content-Length'])
|
|
|
|
except IOError:
|
2016-12-20 21:21:17 +00:00
|
|
|
if retries > CONNECTION_RETRIES:
|
2017-09-19 16:48:34 +00:00
|
|
|
raise ConnectionError('Unable to download {url}'.format(url=url))
|
2016-12-20 21:21:17 +00:00
|
|
|
else:
|
|
|
|
retries += 1
|
|
|
|
time.sleep(0.1)
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
2017-09-19 16:48:34 +00:00
|
|
|
def url_get_file(callback, url, file_path, sha256=None):
|
2016-12-20 21:59:40 +00:00
|
|
|
""""
|
|
|
|
Download a file given a URL. The file is retrieved in chunks, giving the ability to cancel the download at any
|
|
|
|
point. Returns False on download error.
|
|
|
|
|
2016-12-21 09:41:57 +00:00
|
|
|
:param callback: the class which needs to be updated
|
2016-12-20 21:59:40 +00:00
|
|
|
:param url: URL to download
|
2017-09-25 20:34:05 +00:00
|
|
|
:param file_path: Destination file
|
2016-12-21 09:41:57 +00:00
|
|
|
:param sha256: The check sum value to be checked against the download value
|
2016-12-20 21:59:40 +00:00
|
|
|
"""
|
|
|
|
block_count = 0
|
|
|
|
block_size = 4096
|
|
|
|
retries = 0
|
2017-09-19 16:48:34 +00:00
|
|
|
log.debug('url_get_file: %s', url)
|
|
|
|
while retries < CONNECTION_RETRIES:
|
2016-12-20 21:59:40 +00:00
|
|
|
try:
|
2017-09-25 20:34:05 +00:00
|
|
|
with file_path.open('wb') as saved_file:
|
2017-09-19 16:48:34 +00:00
|
|
|
response = requests.get(url, timeout=float(CONNECTION_TIMEOUT), stream=True)
|
2016-12-20 21:59:40 +00:00
|
|
|
if sha256:
|
2017-09-19 16:48:34 +00:00
|
|
|
hasher = hashlib.sha256()
|
|
|
|
# Download until finished or canceled.
|
|
|
|
for chunk in response.iter_content(chunk_size=block_size):
|
|
|
|
if callback.was_cancelled:
|
|
|
|
break
|
|
|
|
saved_file.write(chunk)
|
|
|
|
if sha256:
|
|
|
|
hasher.update(chunk)
|
|
|
|
block_count += 1
|
|
|
|
callback._download_progress(block_count, block_size)
|
|
|
|
response.close()
|
2016-12-20 21:59:40 +00:00
|
|
|
if sha256 and hasher.hexdigest() != sha256:
|
2017-09-19 16:48:34 +00:00
|
|
|
log.error('sha256 sums did not match for file %s, got %s, expected %s', file_path, hasher.hexdigest(),
|
|
|
|
sha256)
|
2017-09-25 20:34:05 +00:00
|
|
|
if file_path.exists():
|
|
|
|
file_path.unlink()
|
2016-12-20 21:59:40 +00:00
|
|
|
return False
|
2017-09-19 16:48:34 +00:00
|
|
|
break
|
|
|
|
except IOError:
|
2016-12-20 21:59:40 +00:00
|
|
|
trace_error_handler(log)
|
|
|
|
if retries > CONNECTION_RETRIES:
|
2017-09-25 20:34:05 +00:00
|
|
|
if file_path.exists():
|
|
|
|
file_path.unlink()
|
2016-12-20 21:59:40 +00:00
|
|
|
return False
|
|
|
|
else:
|
|
|
|
retries += 1
|
|
|
|
time.sleep(0.1)
|
|
|
|
continue
|
2017-09-25 20:34:05 +00:00
|
|
|
if callback.was_cancelled and file_path.exists():
|
|
|
|
file_path.unlink()
|
2016-12-20 21:59:40 +00:00
|
|
|
return True
|
|
|
|
|
2017-02-26 21:14:49 +00:00
|
|
|
|
2016-12-20 21:21:17 +00:00
|
|
|
__all__ = ['get_web_page']
|