From b502c0b79dc523358e4101e9309f4e36934874d7 Mon Sep 17 00:00:00 2001 From: Philip Ridout Date: Tue, 9 Aug 2016 07:24:04 +0100 Subject: [PATCH] Moved get file encoding from lib to common --- openlp/core/common/__init__.py | 22 ++++++ openlp/core/lib/__init__.py | 22 ------ openlp/plugins/bibles/lib/bibleimport.py | 2 +- .../openlp_core_common/test_init.py | 67 ++++++++++++++++++- tests/functional/openlp_core_lib/test_lib.py | 64 +----------------- 5 files changed, 89 insertions(+), 88 deletions(-) diff --git a/openlp/core/common/__init__.py b/openlp/core/common/__init__.py index 9afc08c8f..41d446399 100644 --- a/openlp/core/common/__init__.py +++ b/openlp/core/common/__init__.py @@ -30,6 +30,7 @@ import os import re import sys import traceback +from chardet.universaldetector import UniversalDetector from ipaddress import IPv4Address, IPv6Address, AddressValueError from shutil import which from subprocess import check_output, CalledProcessError, STDOUT @@ -416,3 +417,24 @@ def check_binary_exists(program_path): runlog = '' log.debug('check_output returned: {text}'.format(text=runlog)) return runlog + + +def get_file_encoding(filename): + """ + Utility function to incrementally detect the file encoding. + + :param filename: Filename for the file to determine the encoding for. Str + :return: A dict with the keys 'encoding' and 'confidence' + """ + detector = UniversalDetector() + try: + with open(filename, 'rb') as detect_file: + while not detector.done: + chunk = detect_file.read(1024) + if not chunk: + break + detector.feed(chunk) + detector.close() + return detector.result + except OSError: + log.exception('Error detecting file encoding') diff --git a/openlp/core/lib/__init__.py b/openlp/core/lib/__init__.py index cf1f61aae..fed6df05c 100644 --- a/openlp/core/lib/__init__.py +++ b/openlp/core/lib/__init__.py @@ -27,7 +27,6 @@ OpenLP work. import logging import os from distutils.version import LooseVersion -from chardet.universaldetector import UniversalDetector from PyQt5 import QtCore, QtGui, Qt, QtWidgets @@ -338,27 +337,6 @@ def create_separated_list(string_list): return translate('OpenLP.core.lib', '%s, %s', 'Locale list separator: start') % (string_list[0], merged) -def get_file_encoding(filename): - """ - Utility function to incrementally detect the file encoding. - - :param filename: Filename for the file to determine the encoding for. Str - :return: A dict with the keys 'encoding' and 'confidence' - """ - detector = UniversalDetector() - try: - with open(filename, 'rb') as detect_file: - while not detector.done: - chunk = detect_file.read(1024) - if not chunk: - break - detector.feed(chunk) - detector.close() - return detector.result - except OSError: - log.exception('Error detecting file encoding') - - from .exceptions import ValidationError from .filedialog import FileDialog from .screen import ScreenList diff --git a/openlp/plugins/bibles/lib/bibleimport.py b/openlp/plugins/bibles/lib/bibleimport.py index 3af3a0381..a4313f5f6 100644 --- a/openlp/plugins/bibles/lib/bibleimport.py +++ b/openlp/plugins/bibles/lib/bibleimport.py @@ -4,7 +4,7 @@ ############################################################################### # OpenLP - Open Source Lyrics Projection # # --------------------------------------------------------------------------- # -# Copyright (c) 2008-2015 OpenLP Developers # +# Copyright (c) 2008-2016 OpenLP Developers # # --------------------------------------------------------------------------- # # This program is free software; you can redistribute it and/or modify it # # under the terms of the GNU General Public License as published by the Free # diff --git a/tests/functional/openlp_core_common/test_init.py b/tests/functional/openlp_core_common/test_init.py index 0ccaba94c..98d7aa2fc 100644 --- a/tests/functional/openlp_core_common/test_init.py +++ b/tests/functional/openlp_core_common/test_init.py @@ -23,11 +23,12 @@ Functional tests to test the AppLocation class and related methods. """ import os +from io import BytesIO from unittest import TestCase -from openlp.core.common import add_actions, get_uno_instance, get_uno_command, delete_file, get_filesystem_encoding, \ - split_filename, clean_filename -from tests.functional import MagicMock, patch +from openlp.core.common import add_actions, clean_filename, delete_file, get_file_encoding, get_filesystem_encoding, \ + get_uno_command, get_uno_instance, split_filename +from tests.functional import MagicMock, PropertyMock, call, patch from tests.helpers.testmixin import TestMixin @@ -340,3 +341,63 @@ class TestInit(TestCase, TestMixin): # THEN: delete_file should log and exception and return False self.assertEqual(mocked_log.exception.call_count, 1) self.assertFalse(result, 'delete_file should return False when os.remove raises an OSError') + + def test_get_file_name_encoding_done_test(self): + """ + Test get_file_encoding when the detector sets done to True + """ + # GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration + with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \ + patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open: + encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} + mocked_universal_detector_inst = MagicMock(result=encoding_result) + type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True]) + mocked_universal_detector.return_value = mocked_universal_detector_inst + + # WHEN: Calling get_file_encoding + result = get_file_encoding('file name') + + # THEN: The feed method of UniversalDetector should only br called once before returning a result + mocked_open.assert_called_once_with('file name', 'rb') + self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)]) + mocked_universal_detector_inst.close.assert_called_once_with() + self.assertEqual(result, encoding_result) + + def test_get_file_name_encoding_eof_test(self): + """ + Test get_file_encoding when the end of the file is reached + """ + # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test + # data (enough to run the iterator twice) + with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \ + patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open: + encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} + mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector, + **{'done': False, 'result': encoding_result}) + mocked_universal_detector.return_value = mocked_universal_detector_inst + + # WHEN: Calling get_file_encoding + result = get_file_encoding('file name') + + # THEN: The feed method of UniversalDetector should have been called twice before returning a result + mocked_open.assert_called_once_with('file name', 'rb') + self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)]) + mocked_universal_detector_inst.close.assert_called_once_with() + self.assertEqual(result, encoding_result) + + def test_get_file_name_encoding_oserror_test(self): + """ + Test get_file_encoding when the end of the file is reached + """ + # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test + # data (enough to run the iterator twice) + with patch('openlp.core.common.UniversalDetector'), \ + patch('builtins.open', side_effect=OSError), \ + patch('openlp.core.common.log') as mocked_log: + + # WHEN: Calling get_file_encoding + result = get_file_encoding('file name') + + # THEN: log.exception should be called and get_file_encoding should return None + mocked_log.exception.assert_called_once_with('Error detecting file encoding') + self.assertIsNone(result) diff --git a/tests/functional/openlp_core_lib/test_lib.py b/tests/functional/openlp_core_lib/test_lib.py index fdf9f5acf..145be21f4 100644 --- a/tests/functional/openlp_core_lib/test_lib.py +++ b/tests/functional/openlp_core_lib/test_lib.py @@ -23,7 +23,6 @@ Package to test the openlp.core.lib package. """ import os -from io import BytesIO from unittest import TestCase from datetime import datetime, timedelta @@ -31,8 +30,8 @@ from datetime import datetime, timedelta from PyQt5 import QtCore, QtGui from openlp.core.lib import build_icon, check_item_selected, clean_tags, create_thumb, create_separated_list, \ - expand_tags, get_file_encoding, get_text_file_string, image_to_byte, resize_image, str_to_bool, validate_thumb -from tests.functional import MagicMock, PropertyMock, call, patch + expand_tags, get_text_file_string, image_to_byte, resize_image, str_to_bool, validate_thumb +from tests.functional import MagicMock, patch TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'resources')) @@ -737,62 +736,3 @@ class TestLib(TestCase): # THEN: We should have "Author 1, Author 2, and Author 3" assert string_result == 'Author 1, Author 2, and Author 3', 'The string should be u\'Author 1, ' \ 'Author 2, and Author 3\'.' - - def test_get_file_name_encoding_done_test(self): - """ - Test get_file_encoding when the detector sets done to True - """ - # GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration - with patch('openlp.core.lib.UniversalDetector') as mocked_universal_detector, \ - patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open: - encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} - mocked_universal_detector_inst = MagicMock(result=encoding_result) - type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True]) - mocked_universal_detector.return_value = mocked_universal_detector_inst - - # WHEN: Calling get_file_encoding - result = get_file_encoding('file name') - - # THEN: The feed method of UniversalDetector should only br called once before returning a result - mocked_open.assert_called_once_with('file name', 'rb') - self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)]) - mocked_universal_detector_inst.close.assert_called_once_with() - self.assertEqual(result, encoding_result) - - def test_get_file_name_encoding_eof_test(self): - """ - Test get_file_encoding when the end of the file is reached - """ - # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test - # data (enough to run the iterator twice) - with patch('openlp.core.lib.UniversalDetector') as mocked_universal_detector, \ - patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open: - encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99} - mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector, - **{'done': False, 'result': encoding_result}) - mocked_universal_detector.return_value = mocked_universal_detector_inst - - # WHEN: Calling get_file_encoding - result = get_file_encoding('file name') - - # THEN: The feed method of UniversalDetector should have been called twice before returning a result - mocked_open.assert_called_once_with('file name', 'rb') - self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)]) - mocked_universal_detector_inst.close.assert_called_once_with() - self.assertEqual(result, encoding_result) - - def test_get_file_name_encoding_oserror_test(self): - """ - Test get_file_encoding when the end of the file is reached - """ - # GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test - # data (enough to run the iterator twice) - with patch('openlp.core.lib.UniversalDetector'), \ - patch('builtins.open', side_effect=OSError), \ - patch('openlp.core.lib.log') as mocked_log: - # WHEN: Calling get_file_encoding - result = get_file_encoding('file name') - - # THEN: log.exception should be called and get_file_encoding should return None - mocked_log.exception.assert_called_once_with('Error detecting file encoding') - self.assertIsNone(result)