Moved get file encoding from lib to common

This commit is contained in:
Philip Ridout 2016-08-09 07:24:04 +01:00
parent b65b05de70
commit b502c0b79d
5 changed files with 89 additions and 88 deletions

View File

@ -30,6 +30,7 @@ import os
import re
import sys
import traceback
from chardet.universaldetector import UniversalDetector
from ipaddress import IPv4Address, IPv6Address, AddressValueError
from shutil import which
from subprocess import check_output, CalledProcessError, STDOUT
@ -416,3 +417,24 @@ def check_binary_exists(program_path):
runlog = ''
log.debug('check_output returned: {text}'.format(text=runlog))
return runlog
def get_file_encoding(filename):
"""
Utility function to incrementally detect the file encoding.
:param filename: Filename for the file to determine the encoding for. Str
:return: A dict with the keys 'encoding' and 'confidence'
"""
detector = UniversalDetector()
try:
with open(filename, 'rb') as detect_file:
while not detector.done:
chunk = detect_file.read(1024)
if not chunk:
break
detector.feed(chunk)
detector.close()
return detector.result
except OSError:
log.exception('Error detecting file encoding')

View File

@ -27,7 +27,6 @@ OpenLP work.
import logging
import os
from distutils.version import LooseVersion
from chardet.universaldetector import UniversalDetector
from PyQt5 import QtCore, QtGui, Qt, QtWidgets
@ -338,27 +337,6 @@ def create_separated_list(string_list):
return translate('OpenLP.core.lib', '%s, %s', 'Locale list separator: start') % (string_list[0], merged)
def get_file_encoding(filename):
"""
Utility function to incrementally detect the file encoding.
:param filename: Filename for the file to determine the encoding for. Str
:return: A dict with the keys 'encoding' and 'confidence'
"""
detector = UniversalDetector()
try:
with open(filename, 'rb') as detect_file:
while not detector.done:
chunk = detect_file.read(1024)
if not chunk:
break
detector.feed(chunk)
detector.close()
return detector.result
except OSError:
log.exception('Error detecting file encoding')
from .exceptions import ValidationError
from .filedialog import FileDialog
from .screen import ScreenList

View File

@ -4,7 +4,7 @@
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
# Copyright (c) 2008-2015 OpenLP Developers #
# Copyright (c) 2008-2016 OpenLP Developers #
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #

View File

@ -23,11 +23,12 @@
Functional tests to test the AppLocation class and related methods.
"""
import os
from io import BytesIO
from unittest import TestCase
from openlp.core.common import add_actions, get_uno_instance, get_uno_command, delete_file, get_filesystem_encoding, \
split_filename, clean_filename
from tests.functional import MagicMock, patch
from openlp.core.common import add_actions, clean_filename, delete_file, get_file_encoding, get_filesystem_encoding, \
get_uno_command, get_uno_instance, split_filename
from tests.functional import MagicMock, PropertyMock, call, patch
from tests.helpers.testmixin import TestMixin
@ -340,3 +341,63 @@ class TestInit(TestCase, TestMixin):
# THEN: delete_file should log and exception and return False
self.assertEqual(mocked_log.exception.call_count, 1)
self.assertFalse(result, 'delete_file should return False when os.remove raises an OSError')
def test_get_file_name_encoding_done_test(self):
"""
Test get_file_encoding when the detector sets done to True
"""
# GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration
with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(result=encoding_result)
type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True])
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should only br called once before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_eof_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector,
**{'done': False, 'result': encoding_result})
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should have been called twice before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_oserror_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.common.UniversalDetector'), \
patch('builtins.open', side_effect=OSError), \
patch('openlp.core.common.log') as mocked_log:
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: log.exception should be called and get_file_encoding should return None
mocked_log.exception.assert_called_once_with('Error detecting file encoding')
self.assertIsNone(result)

View File

@ -23,7 +23,6 @@
Package to test the openlp.core.lib package.
"""
import os
from io import BytesIO
from unittest import TestCase
from datetime import datetime, timedelta
@ -31,8 +30,8 @@ from datetime import datetime, timedelta
from PyQt5 import QtCore, QtGui
from openlp.core.lib import build_icon, check_item_selected, clean_tags, create_thumb, create_separated_list, \
expand_tags, get_file_encoding, get_text_file_string, image_to_byte, resize_image, str_to_bool, validate_thumb
from tests.functional import MagicMock, PropertyMock, call, patch
expand_tags, get_text_file_string, image_to_byte, resize_image, str_to_bool, validate_thumb
from tests.functional import MagicMock, patch
TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'resources'))
@ -737,62 +736,3 @@ class TestLib(TestCase):
# THEN: We should have "Author 1, Author 2, and Author 3"
assert string_result == 'Author 1, Author 2, and Author 3', 'The string should be u\'Author 1, ' \
'Author 2, and Author 3\'.'
def test_get_file_name_encoding_done_test(self):
"""
Test get_file_encoding when the detector sets done to True
"""
# GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration
with patch('openlp.core.lib.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(result=encoding_result)
type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True])
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should only br called once before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_eof_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.lib.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector,
**{'done': False, 'result': encoding_result})
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should have been called twice before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_oserror_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.lib.UniversalDetector'), \
patch('builtins.open', side_effect=OSError), \
patch('openlp.core.lib.log') as mocked_log:
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: log.exception should be called and get_file_encoding should return None
mocked_log.exception.assert_called_once_with('Error detecting file encoding')
self.assertIsNone(result)