Add a way to detect if there is a header in the CSV file

This commit is contained in:
Raoul Snyman 2022-02-08 11:54:34 -07:00
parent 586007bb10
commit 6fcd9c233b
2 changed files with 68 additions and 13 deletions

View File

@ -48,9 +48,9 @@ There are two acceptable formats of the verses file. They are:
All CSV files are expected to use a comma (',') as the delimiter and double quotes ('"') as the quote symbol.
"""
import csv
import logging
from collections import namedtuple
from csv import Error as CSVError, reader
from openlp.core.common import get_file_encoding
from openlp.core.common.i18n import translate
@ -63,6 +63,19 @@ Book = namedtuple('Book', 'id, testament_id, name, abbreviation')
Verse = namedtuple('Verse', 'book_id_name, chapter_number, number, text')
def _has_header(sample):
"""Determine if the sample of a csv file has a header line"""
if '\r\n' in sample:
lines = sample.split('\r\n')
else:
lines = sample.split('\n')
row_1 = lines[0].split(',')
row_2 = lines[1].split(',')
if all([row_2[0].isdigit(), row_2[1].isdigit()]) and not all([row_1[0].isdigit(), row_1[1].isdigit()]):
return True
return False
class CSVBible(BibleImport):
"""
This class provides a specialisation for importing of CSV Bibles.
@ -105,9 +118,17 @@ class CSVBible(BibleImport):
try:
encoding = get_file_encoding(file_path)
with file_path.open('r', encoding=encoding, newline='') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
# Grab a sample from the file, and rewind to the beginning
sample = csv_file.read(4096)
csv_file.seek(0)
# Create the reader
csv_reader = reader(csv_file, delimiter=',', quotechar='"')
# Determine if the CSV has a header and skip if necessary
if _has_header(sample):
print("has_header")
next(csv_reader)
return [results_tuple(*line) for line in csv_reader]
except (OSError, csv.Error, TypeError, UnicodeDecodeError):
except (OSError, CSVError, TypeError, UnicodeDecodeError, ValueError):
log.exception('Parsing {file} failed.'.format(file=file_path))
raise ValidationError(msg='Parsing "{file}" failed'.format(file=file_path))

View File

@ -29,7 +29,7 @@ from unittest.mock import MagicMock, PropertyMock, call, patch
from openlp.core.lib.exceptions import ValidationError
from openlp.plugins.bibles.lib.bibleimport import BibleImport
from openlp.plugins.bibles.lib.importers.csvbible import Book, CSVBible, Verse
from openlp.plugins.bibles.lib.importers.csvbible import Book, CSVBible, Verse, _has_header
from tests.utils import load_external_result_data
from tests.utils.constants import RESOURCE_PATH
@ -128,8 +128,10 @@ def test_parse_csv_file():
mocked_csv_file.open.return_value.__enter__.return_value = mocked_enter_file
with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding', return_value='utf-8'), \
patch('openlp.plugins.bibles.lib.importers.csvbible.csv.reader',
return_value=iter(test_data)) as mocked_reader:
patch('openlp.plugins.bibles.lib.importers.csvbible.reader',
return_value=iter(test_data)) as mocked_reader, \
patch('openlp.plugins.bibles.lib.importers.csvbible._has_header') as mocked_has_header:
mocked_has_header.return_value = False
# WHEN: Calling the CSVBible parse_csv_file method with a file name and TestTuple
result = CSVBible.parse_csv_file(mocked_csv_file, TestTuple)
@ -141,6 +143,38 @@ def test_parse_csv_file():
mocked_reader.assert_called_once_with(mocked_enter_file, delimiter=',', quotechar='"')
def test_has_header():
"""
Test the _has_header() with sample data
"""
# GIVEN: A mocked csv.reader which returns an iterator with test data
test_data = """<book_id>,<testament_id>,<book_name>,<abbreviation>
1,1,Genesis,Gen
"""
# WHEN: Sample data is given to _has_header()
result = _has_header(test_data)
# THEN: The result should be true
assert result is True
def test_has_no_header():
"""
Test the _has_header() with sample data that does not have a header
"""
# GIVEN: A mocked csv.reader which returns an iterator with test data
test_data = """1,1,Genesis,Gen
2,1,Exodus,Exo
"""
# WHEN: Sample data is given to _has_header()
result = _has_header(test_data)
# THEN: The result should be true
assert result is False
def test_parse_csv_file_oserror():
"""
Test the parse_csv_file() handles an OSError correctly
@ -170,7 +204,7 @@ def test_parse_csv_file_csverror():
with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding',
return_value={'encoding': 'utf-8', 'confidence': 0.99}),\
patch('openlp.plugins.bibles.lib.importers.csvbible.csv.reader', side_effect=csv.Error):
patch('openlp.plugins.bibles.lib.importers.csvbible.reader', side_effect=csv.Error):
# WHEN: Calling CSVBible.parse_csv_file
# THEN: A ValidationError should be raised
@ -185,7 +219,7 @@ def test_process_books_stopped_import(registry):
"""
# GIVEN: An instance of CSVBible with the stop_import_flag set to True
mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'):
with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv'))
type(importer).application = PropertyMock()
@ -206,7 +240,7 @@ def test_process_books(registry):
"""
# GIVEN: An instance of CSVBible with the stop_import_flag set to False, and some sample data
mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'),\
with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'),\
patch('openlp.plugins.bibles.lib.importers.csvbible.translate'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv'))
@ -233,7 +267,7 @@ def test_process_verses_stopped_import(registry):
"""
# GIVEN: An instance of CSVBible with the stop_import_flag set to True
mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'):
with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv'))
importer.get_book_name = MagicMock()
@ -255,7 +289,7 @@ def test_process_verses_successful(registry):
"""
# GIVEN: An instance of CSVBible with the application and wizard attributes mocked out, and some test data.
mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'),\
with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'),\
patch('openlp.plugins.bibles.lib.importers.csvbible.translate'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv'))
@ -289,7 +323,7 @@ def test_do_import_invalid_language_id(registry):
"""
# GIVEN: An instance of CSVBible and a mocked get_language which simulates the user cancelling the language box
mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'):
with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verse.csv'))
importer.get_language = MagicMock(return_value=None)
@ -308,7 +342,7 @@ def test_do_import_success(registry):
"""
# GIVEN: An instance of CSVBible
mocked_manager = MagicMock()
with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'):
with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'):
importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'),
verse_path=Path('verses.csv'))
importer.get_language = MagicMock(return_value=10)