From 6fcd9c233bd447193151b6c6687975adfef4b868 Mon Sep 17 00:00:00 2001 From: Raoul Snyman Date: Tue, 8 Feb 2022 11:54:34 -0700 Subject: [PATCH] Add a way to detect if there is a header in the CSV file --- .../plugins/bibles/lib/importers/csvbible.py | 27 ++++++++-- tests/openlp_plugins/bibles/test_csvimport.py | 54 +++++++++++++++---- 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/openlp/plugins/bibles/lib/importers/csvbible.py b/openlp/plugins/bibles/lib/importers/csvbible.py index 4bdfc3993..792db5de1 100644 --- a/openlp/plugins/bibles/lib/importers/csvbible.py +++ b/openlp/plugins/bibles/lib/importers/csvbible.py @@ -48,9 +48,9 @@ There are two acceptable formats of the verses file. They are: All CSV files are expected to use a comma (',') as the delimiter and double quotes ('"') as the quote symbol. """ -import csv import logging from collections import namedtuple +from csv import Error as CSVError, reader from openlp.core.common import get_file_encoding from openlp.core.common.i18n import translate @@ -63,6 +63,19 @@ Book = namedtuple('Book', 'id, testament_id, name, abbreviation') Verse = namedtuple('Verse', 'book_id_name, chapter_number, number, text') +def _has_header(sample): + """Determine if the sample of a csv file has a header line""" + if '\r\n' in sample: + lines = sample.split('\r\n') + else: + lines = sample.split('\n') + row_1 = lines[0].split(',') + row_2 = lines[1].split(',') + if all([row_2[0].isdigit(), row_2[1].isdigit()]) and not all([row_1[0].isdigit(), row_1[1].isdigit()]): + return True + return False + + class CSVBible(BibleImport): """ This class provides a specialisation for importing of CSV Bibles. @@ -105,9 +118,17 @@ class CSVBible(BibleImport): try: encoding = get_file_encoding(file_path) with file_path.open('r', encoding=encoding, newline='') as csv_file: - csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') + # Grab a sample from the file, and rewind to the beginning + sample = csv_file.read(4096) + csv_file.seek(0) + # Create the reader + csv_reader = reader(csv_file, delimiter=',', quotechar='"') + # Determine if the CSV has a header and skip if necessary + if _has_header(sample): + print("has_header") + next(csv_reader) return [results_tuple(*line) for line in csv_reader] - except (OSError, csv.Error, TypeError, UnicodeDecodeError): + except (OSError, CSVError, TypeError, UnicodeDecodeError, ValueError): log.exception('Parsing {file} failed.'.format(file=file_path)) raise ValidationError(msg='Parsing "{file}" failed'.format(file=file_path)) diff --git a/tests/openlp_plugins/bibles/test_csvimport.py b/tests/openlp_plugins/bibles/test_csvimport.py index dac6630c8..3a76e7211 100644 --- a/tests/openlp_plugins/bibles/test_csvimport.py +++ b/tests/openlp_plugins/bibles/test_csvimport.py @@ -29,7 +29,7 @@ from unittest.mock import MagicMock, PropertyMock, call, patch from openlp.core.lib.exceptions import ValidationError from openlp.plugins.bibles.lib.bibleimport import BibleImport -from openlp.plugins.bibles.lib.importers.csvbible import Book, CSVBible, Verse +from openlp.plugins.bibles.lib.importers.csvbible import Book, CSVBible, Verse, _has_header from tests.utils import load_external_result_data from tests.utils.constants import RESOURCE_PATH @@ -128,8 +128,10 @@ def test_parse_csv_file(): mocked_csv_file.open.return_value.__enter__.return_value = mocked_enter_file with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding', return_value='utf-8'), \ - patch('openlp.plugins.bibles.lib.importers.csvbible.csv.reader', - return_value=iter(test_data)) as mocked_reader: + patch('openlp.plugins.bibles.lib.importers.csvbible.reader', + return_value=iter(test_data)) as mocked_reader, \ + patch('openlp.plugins.bibles.lib.importers.csvbible._has_header') as mocked_has_header: + mocked_has_header.return_value = False # WHEN: Calling the CSVBible parse_csv_file method with a file name and TestTuple result = CSVBible.parse_csv_file(mocked_csv_file, TestTuple) @@ -141,6 +143,38 @@ def test_parse_csv_file(): mocked_reader.assert_called_once_with(mocked_enter_file, delimiter=',', quotechar='"') +def test_has_header(): + """ + Test the _has_header() with sample data + """ + # GIVEN: A mocked csv.reader which returns an iterator with test data + test_data = """,,, +1,1,Genesis,Gen +""" + + # WHEN: Sample data is given to _has_header() + result = _has_header(test_data) + + # THEN: The result should be true + assert result is True + + +def test_has_no_header(): + """ + Test the _has_header() with sample data that does not have a header + """ + # GIVEN: A mocked csv.reader which returns an iterator with test data + test_data = """1,1,Genesis,Gen +2,1,Exodus,Exo +""" + + # WHEN: Sample data is given to _has_header() + result = _has_header(test_data) + + # THEN: The result should be true + assert result is False + + def test_parse_csv_file_oserror(): """ Test the parse_csv_file() handles an OSError correctly @@ -170,7 +204,7 @@ def test_parse_csv_file_csverror(): with patch('openlp.plugins.bibles.lib.importers.csvbible.get_file_encoding', return_value={'encoding': 'utf-8', 'confidence': 0.99}),\ - patch('openlp.plugins.bibles.lib.importers.csvbible.csv.reader', side_effect=csv.Error): + patch('openlp.plugins.bibles.lib.importers.csvbible.reader', side_effect=csv.Error): # WHEN: Calling CSVBible.parse_csv_file # THEN: A ValidationError should be raised @@ -185,7 +219,7 @@ def test_process_books_stopped_import(registry): """ # GIVEN: An instance of CSVBible with the stop_import_flag set to True mocked_manager = MagicMock() - with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): + with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'): importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), verse_path=Path('verse.csv')) type(importer).application = PropertyMock() @@ -206,7 +240,7 @@ def test_process_books(registry): """ # GIVEN: An instance of CSVBible with the stop_import_flag set to False, and some sample data mocked_manager = MagicMock() - with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'),\ + with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'),\ patch('openlp.plugins.bibles.lib.importers.csvbible.translate'): importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), verse_path=Path('verse.csv')) @@ -233,7 +267,7 @@ def test_process_verses_stopped_import(registry): """ # GIVEN: An instance of CSVBible with the stop_import_flag set to True mocked_manager = MagicMock() - with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): + with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'): importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), verse_path=Path('verse.csv')) importer.get_book_name = MagicMock() @@ -255,7 +289,7 @@ def test_process_verses_successful(registry): """ # GIVEN: An instance of CSVBible with the application and wizard attributes mocked out, and some test data. mocked_manager = MagicMock() - with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'),\ + with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'),\ patch('openlp.plugins.bibles.lib.importers.csvbible.translate'): importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), verse_path=Path('verse.csv')) @@ -289,7 +323,7 @@ def test_do_import_invalid_language_id(registry): """ # GIVEN: An instance of CSVBible and a mocked get_language which simulates the user cancelling the language box mocked_manager = MagicMock() - with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): + with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'): importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), verse_path=Path('verse.csv')) importer.get_language = MagicMock(return_value=None) @@ -308,7 +342,7 @@ def test_do_import_success(registry): """ # GIVEN: An instance of CSVBible mocked_manager = MagicMock() - with patch('openlp.plugins.bibles.lib.db.BibleDB._setup'): + with patch('openlp.plugins.bibles.lib.bibleimport.BibleDB._setup'): importer = CSVBible(mocked_manager, path='.', name='.', books_path=Path('books.csv'), verse_path=Path('verses.csv')) importer.get_language = MagicMock(return_value=10)