Merge branch 'issue-238-handle-headers' into 'master'

Add a way to detect if there is a header in the CSV file

Closes #238

See merge request openlp/openlp!415
This commit is contained in:
Tomas Groth 2022-02-09 17:48:37 +00:00
commit 296c3cb89d
2 changed files with 68 additions and 13 deletions

View File

@ -48,9 +48,9 @@ There are two acceptable formats of the verses file. They are:
All CSV files are expected to use a comma (',') as the delimiter and double quotes ('"') as the quote symbol. All CSV files are expected to use a comma (',') as the delimiter and double quotes ('"') as the quote symbol.
""" """
import csv
import logging import logging
from collections import namedtuple from collections import namedtuple
from csv import Error as CSVError, reader
from openlp.core.common import get_file_encoding from openlp.core.common import get_file_encoding
from openlp.core.common.i18n import translate from openlp.core.common.i18n import translate
@ -63,6 +63,19 @@ Book = namedtuple('Book', 'id, testament_id, name, abbreviation')
Verse = namedtuple('Verse', 'book_id_name, chapter_number, number, text') Verse = namedtuple('Verse', 'book_id_name, chapter_number, number, text')
def _has_header(sample):
"""Determine if the sample of a csv file has a header line"""
if '\r\n' in sample:
lines = sample.split('\r\n')
else:
lines = sample.split('\n')
row_1 = lines[0].split(',')
row_2 = lines[1].split(',')
if all([row_2[0].isdigit(), row_2[1].isdigit()]) and not all([row_1[0].isdigit(), row_1[1].isdigit()]):
return True
return False
class CSVBible(BibleImport): class CSVBible(BibleImport):
""" """
This class provides a specialisation for importing of CSV Bibles. This class provides a specialisation for importing of CSV Bibles.
@ -105,9 +118,17 @@ class CSVBible(BibleImport):
try: try:
encoding = get_file_encoding(file_path) encoding = get_file_encoding(file_path)
with file_path.open('r', encoding=encoding, newline='') as csv_file: with file_path.open('r', encoding=encoding, newline='') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') # Grab a sample from the file, and rewind to the beginning
sample = csv_file.read(4096)
csv_file.seek(0)
# Create the reader
csv_reader = reader(csv_file, delimiter=',', quotechar='"')
# Determine if the CSV has a header and skip if necessary
if _has_header(sample):
print("has_header")
next(csv_reader)
return [results_tuple(*line) for line in csv_reader] return [results_tuple(*line) for line in csv_reader]
except (OSError, csv.Error, TypeError, UnicodeDecodeError): except (OSError, CSVError, TypeError, UnicodeDecodeError, ValueError):
log.exception('Parsing {file} failed.'.format(file=file_path)) log.exception('Parsing {file} failed.'.format(file=file_path))
raise ValidationError(msg='Parsing "{file}" failed'.format(file=file_path)) raise ValidationError(msg='Parsing "{file}" failed'.format(file=file_path))

View File

@ -29,7 +29,7 @@ from unittest.mock import MagicMock, PropertyMock, call, patch
from openlp.core.lib.exceptions import ValidationError from openlp.core.lib.exceptions import ValidationError
from openlp.plugins.bibles.lib.bibleimport import BibleImport from openlp.plugins.bibles.lib.bibleimport import BibleImport
from openlp.plugins.bibles.lib.importers.csvbible import Book, CSVBible, Verse from openlp.plugins.bibles.lib.importers.csvbible import Book, CSVBible, Verse, _has_header
from tests.utils import load_external_result_data from tests.utils import load_external_result_data
from tests.utils.constants import RESOURCE_PATH from tests.utils.constants import RESOURCE_PATH
@ -128,8 +128,10 @@ def test_parse_csv_file():
mocked_csv_file.open.return_value.__enter__.return_value = mocked_enter_file mocked_csv_file.open.return_value.__enter__.return_value = mocked_enter_file
with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding', return_value='utf-8'), \ with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding', return_value='utf-8'), \
patch('openlp.plugins.bibles.lib.importers.csvbible.csv.reader', patch('openlp.plugins.bibles.lib.importers.csvbible.reader',
return_value=iter(test_data)) as mocked_reader: return_value=iter(test_data)) as mocked_reader, \
patch('openlp.plugins.bibles.lib.importers.csvbible._has_header') as mocked_has_header:
mocked_has_header.return_value = False
# WHEN: Calling the CSVBible parse_csv_file method with a file name and TestTuple # WHEN: Calling the CSVBible parse_csv_file method with a file name and TestTuple
result = CSVBible.parse_csv_file(mocked_csv_file, TestTuple) result = CSVBible.parse_csv_file(mocked_csv_file, TestTuple)
@ -141,6 +143,38 @@ def test_parse_csv_file():
mocked_reader.assert_called_once_with(mocked_enter_file, delimiter=',', quotechar='"') mocked_reader.assert_called_once_with(mocked_enter_file, delimiter=',', quotechar='"')
def test_has_header():
"""
Test the _has_header() with sample data
"""
# GIVEN: A mocked csv.reader which returns an iterator with test data
test_data = """<book_id>,<testament_id>,<book_name>,<abbreviation>
1,1,Genesis,Gen
"""
# WHEN: Sample data is given to _has_header()
result = _has_header(test_data)
# THEN: The result should be true
assert result is True
def test_has_no_header():
"""
Test the _has_header() with sample data that does not have a header
"""
# GIVEN: A mocked csv.reader which returns an iterator with test data
test_data = """1,1,Genesis,Gen
2,1,Exodus,Exo
"""
# WHEN: Sample data is given to _has_header()
result = _has_header(test_data)
# THEN: The result should be true
assert result is False
def test_parse_csv_file_oserror(): def test_parse_csv_file_oserror():
""" """
Test the parse_csv_file() handles an OSError correctly Test the parse_csv_file() handles an OSError correctly
@ -170,7 +204,7 @@ def test_parse_csv_file_csverror():
with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding', with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding',
return_value={'encoding': 'utf-8', 'confidence': 0.99}),\ return_value={'encoding': 'utf-8', 'confidence': 0.99}),\
patch('openlp.plugins.bibles.lib.importers.csvbible.csv.reader', side_effect=csv.Error): patch('openlp.plugins.bibles.lib.importers.csvbible.reader', side_effect=csv.Error):
# WHEN: Calling CSVBible.parse_csv_file # WHEN: Calling CSVBible.parse_csv_file
# THEN: A ValidationError should be raised # THEN: A ValidationError should be raised
@ -185,7 +219,7 @@ def test_process_books_stopped_import(registry):
""" """
# GIVEN: An instance of CSVBible with the stop_import_flag set to True # GIVEN: An instance of CSVBible with the stop_import_flag set to True
mocked_manager = MagicMock() mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv')) verse_path=Path('verse.csv'))
type(importer).application = PropertyMock() type(importer).application = PropertyMock()
@ -206,7 +240,7 @@ def test_process_books(registry):
""" """
# GIVEN: An instance of CSVBible with the stop_import_flag set to False, and some sample data # GIVEN: An instance of CSVBible with the stop_import_flag set to False, and some sample data
mocked_manager = MagicMock() mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'),\ with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'),\
patch('openlp.plugins.bibles.lib.importers.csvbible.translate'): patch('openlp.plugins.bibles.lib.importers.csvbible.translate'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv')) verse_path=Path('verse.csv'))
@ -233,7 +267,7 @@ def test_process_verses_stopped_import(registry):
""" """
# GIVEN: An instance of CSVBible with the stop_import_flag set to True # GIVEN: An instance of CSVBible with the stop_import_flag set to True
mocked_manager = MagicMock() mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv')) verse_path=Path('verse.csv'))
importer.get_book_name = MagicMock() importer.get_book_name = MagicMock()
@ -255,7 +289,7 @@ def test_process_verses_successful(registry):
""" """
# GIVEN: An instance of CSVBible with the application and wizard attributes mocked out, and some test data. # GIVEN: An instance of CSVBible with the application and wizard attributes mocked out, and some test data.
mocked_manager = MagicMock() mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'),\ with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'),\
patch('openlp.plugins.bibles.lib.importers.csvbible.translate'): patch('openlp.plugins.bibles.lib.importers.csvbible.translate'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv')) verse_path=Path('verse.csv'))
@ -289,7 +323,7 @@ def test_do_import_invalid_language_id(registry):
""" """
# GIVEN: An instance of CSVBible and a mocked get_language which simulates the user cancelling the language box # GIVEN: An instance of CSVBible and a mocked get_language which simulates the user cancelling the language box
mocked_manager = MagicMock() mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv')) verse_path=Path('verse.csv'))
importer.get_language = MagicMock(return_value=None) importer.get_language = MagicMock(return_value=None)
@ -308,7 +342,7 @@ def test_do_import_success(registry):
""" """
# GIVEN: An instance of CSVBible # GIVEN: An instance of CSVBible
mocked_manager = MagicMock() mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verses.csv')) verse_path=Path('verses.csv'))
importer.get_language = MagicMock(return_value=10) importer.get_language = MagicMock(return_value=10)