Moved get file encoding from lib to common

This commit is contained in:
Philip Ridout 2016-08-09 07:24:04 +01:00
parent b65b05de70
commit b502c0b79d
5 changed files with 89 additions and 88 deletions

View File

@ -30,6 +30,7 @@ import os
import re import re
import sys import sys
import traceback import traceback
from chardet.universaldetector import UniversalDetector
from ipaddress import IPv4Address, IPv6Address, AddressValueError from ipaddress import IPv4Address, IPv6Address, AddressValueError
from shutil import which from shutil import which
from subprocess import check_output, CalledProcessError, STDOUT from subprocess import check_output, CalledProcessError, STDOUT
@ -416,3 +417,24 @@ def check_binary_exists(program_path):
runlog = '' runlog = ''
log.debug('check_output returned: {text}'.format(text=runlog)) log.debug('check_output returned: {text}'.format(text=runlog))
return runlog return runlog
def get_file_encoding(filename):
"""
Utility function to incrementally detect the file encoding.
:param filename: Filename for the file to determine the encoding for. Str
:return: A dict with the keys 'encoding' and 'confidence'
"""
detector = UniversalDetector()
try:
with open(filename, 'rb') as detect_file:
while not detector.done:
chunk = detect_file.read(1024)
if not chunk:
break
detector.feed(chunk)
detector.close()
return detector.result
except OSError:
log.exception('Error detecting file encoding')

View File

@ -27,7 +27,6 @@ OpenLP work.
import logging import logging
import os import os
from distutils.version import LooseVersion from distutils.version import LooseVersion
from chardet.universaldetector import UniversalDetector
from PyQt5 import QtCore, QtGui, Qt, QtWidgets from PyQt5 import QtCore, QtGui, Qt, QtWidgets
@ -338,27 +337,6 @@ def create_separated_list(string_list):
return translate('OpenLP.core.lib', '%s, %s', 'Locale list separator: start') % (string_list[0], merged) return translate('OpenLP.core.lib', '%s, %s', 'Locale list separator: start') % (string_list[0], merged)
def get_file_encoding(filename):
"""
Utility function to incrementally detect the file encoding.
:param filename: Filename for the file to determine the encoding for. Str
:return: A dict with the keys 'encoding' and 'confidence'
"""
detector = UniversalDetector()
try:
with open(filename, 'rb') as detect_file:
while not detector.done:
chunk = detect_file.read(1024)
if not chunk:
break
detector.feed(chunk)
detector.close()
return detector.result
except OSError:
log.exception('Error detecting file encoding')
from .exceptions import ValidationError from .exceptions import ValidationError
from .filedialog import FileDialog from .filedialog import FileDialog
from .screen import ScreenList from .screen import ScreenList

View File

@ -4,7 +4,7 @@
############################################################################### ###############################################################################
# OpenLP - Open Source Lyrics Projection # # OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
# Copyright (c) 2008-2015 OpenLP Developers # # Copyright (c) 2008-2016 OpenLP Developers #
# --------------------------------------------------------------------------- # # --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it # # This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free # # under the terms of the GNU General Public License as published by the Free #

View File

@ -23,11 +23,12 @@
Functional tests to test the AppLocation class and related methods. Functional tests to test the AppLocation class and related methods.
""" """
import os import os
from io import BytesIO
from unittest import TestCase from unittest import TestCase
from openlp.core.common import add_actions, get_uno_instance, get_uno_command, delete_file, get_filesystem_encoding, \ from openlp.core.common import add_actions, clean_filename, delete_file, get_file_encoding, get_filesystem_encoding, \
split_filename, clean_filename get_uno_command, get_uno_instance, split_filename
from tests.functional import MagicMock, patch from tests.functional import MagicMock, PropertyMock, call, patch
from tests.helpers.testmixin import TestMixin from tests.helpers.testmixin import TestMixin
@ -340,3 +341,63 @@ class TestInit(TestCase, TestMixin):
# THEN: delete_file should log and exception and return False # THEN: delete_file should log and exception and return False
self.assertEqual(mocked_log.exception.call_count, 1) self.assertEqual(mocked_log.exception.call_count, 1)
self.assertFalse(result, 'delete_file should return False when os.remove raises an OSError') self.assertFalse(result, 'delete_file should return False when os.remove raises an OSError')
def test_get_file_name_encoding_done_test(self):
"""
Test get_file_encoding when the detector sets done to True
"""
# GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration
with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(result=encoding_result)
type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True])
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should only br called once before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_eof_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.common.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector,
**{'done': False, 'result': encoding_result})
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should have been called twice before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_oserror_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.common.UniversalDetector'), \
patch('builtins.open', side_effect=OSError), \
patch('openlp.core.common.log') as mocked_log:
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: log.exception should be called and get_file_encoding should return None
mocked_log.exception.assert_called_once_with('Error detecting file encoding')
self.assertIsNone(result)

View File

@ -23,7 +23,6 @@
Package to test the openlp.core.lib package. Package to test the openlp.core.lib package.
""" """
import os import os
from io import BytesIO
from unittest import TestCase from unittest import TestCase
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -31,8 +30,8 @@ from datetime import datetime, timedelta
from PyQt5 import QtCore, QtGui from PyQt5 import QtCore, QtGui
from openlp.core.lib import build_icon, check_item_selected, clean_tags, create_thumb, create_separated_list, \ from openlp.core.lib import build_icon, check_item_selected, clean_tags, create_thumb, create_separated_list, \
expand_tags, get_file_encoding, get_text_file_string, image_to_byte, resize_image, str_to_bool, validate_thumb expand_tags, get_text_file_string, image_to_byte, resize_image, str_to_bool, validate_thumb
from tests.functional import MagicMock, PropertyMock, call, patch from tests.functional import MagicMock, patch
TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'resources')) TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'resources'))
@ -737,62 +736,3 @@ class TestLib(TestCase):
# THEN: We should have "Author 1, Author 2, and Author 3" # THEN: We should have "Author 1, Author 2, and Author 3"
assert string_result == 'Author 1, Author 2, and Author 3', 'The string should be u\'Author 1, ' \ assert string_result == 'Author 1, Author 2, and Author 3', 'The string should be u\'Author 1, ' \
'Author 2, and Author 3\'.' 'Author 2, and Author 3\'.'
def test_get_file_name_encoding_done_test(self):
"""
Test get_file_encoding when the detector sets done to True
"""
# GIVEN: A mocked UniversalDetector instance with done attribute set to True after first iteration
with patch('openlp.core.lib.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(result=encoding_result)
type(mocked_universal_detector_inst).done = PropertyMock(side_effect=[False, True])
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should only br called once before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_eof_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.lib.UniversalDetector') as mocked_universal_detector, \
patch('builtins.open', return_value=BytesIO(b"data" * 260)) as mocked_open:
encoding_result = {'encoding': 'UTF-8', 'confidence': 0.99}
mocked_universal_detector_inst = MagicMock(mock=mocked_universal_detector,
**{'done': False, 'result': encoding_result})
mocked_universal_detector.return_value = mocked_universal_detector_inst
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: The feed method of UniversalDetector should have been called twice before returning a result
mocked_open.assert_called_once_with('file name', 'rb')
self.assertEqual(mocked_universal_detector_inst.feed.mock_calls, [call(b"data" * 256), call(b"data" * 4)])
mocked_universal_detector_inst.close.assert_called_once_with()
self.assertEqual(result, encoding_result)
def test_get_file_name_encoding_oserror_test(self):
"""
Test get_file_encoding when the end of the file is reached
"""
# GIVEN: A mocked UniversalDetector instance which isn't set to done and a mocked open, with 1040 bytes of test
# data (enough to run the iterator twice)
with patch('openlp.core.lib.UniversalDetector'), \
patch('builtins.open', side_effect=OSError), \
patch('openlp.core.lib.log') as mocked_log:
# WHEN: Calling get_file_encoding
result = get_file_encoding('file name')
# THEN: log.exception should be called and get_file_encoding should return None
mocked_log.exception.assert_called_once_with('Error detecting file encoding')
self.assertIsNone(result)