Supposedly fix BOM mess for utf-8 files. We assume no other file would begin with BOM, whatever encoding they have.

This commit is contained in:
Mattias Põldaru 2011-12-10 20:21:18 +02:00
parent 4949f88f28
commit ca6250ae3b
4 changed files with 14 additions and 13 deletions

View File

@ -80,6 +80,9 @@ def get_text_file_string(text_file):
content_string = None
try:
file_handle = open(text_file, u'r')
if not file_handle.read(3) == '\xEF\xBB\xBF':
# no BOM was found
file_handle.seek(0)
content = file_handle.read()
content_string = content.decode(u'utf-8')
except (IOError, UnicodeError):

View File

@ -28,17 +28,7 @@
The :mod:`cvsbible` modules provides a facility to import bibles from a set of
CSV files.
The module expects two mandatory files containing the books and the verses and
will accept an optional third file containing the testaments.
The format of the testament file is:
<testament_id>,<testament_name>
For example:
1,Old Testament
2,New Testament
The module expects two mandatory files containing the books and the verses.
The format of the books file is:
@ -110,6 +100,9 @@ class CSVBible(BibleDB):
try:
details = get_file_encoding(self.booksfile)
books_file = open(self.booksfile, 'r')
if not books_file.read(3) == '\xEF\xBB\xBF':
# no BOM was found
books_file.seek(0)
books_reader = csv.reader(books_file, delimiter=',', quotechar='"')
for line in books_reader:
if self.stop_import_flag:
@ -144,6 +137,9 @@ class CSVBible(BibleDB):
book_ptr = None
details = get_file_encoding(self.versesfile)
verse_file = open(self.versesfile, 'rb')
if not verse_file.read(3) == '\xEF\xBB\xBF':
# no BOM was found
verse_file.seek(0)
verse_reader = csv.reader(verse_file, delimiter=',', quotechar='"')
for line in verse_reader:
if self.stop_import_flag:

View File

@ -78,8 +78,7 @@ class OSISBible(BibleDB):
fbibles = open(filepath, u'r')
for line in fbibles:
book = line.split(u',')
self.books[book[0]] = (book[1].lstrip().rstrip(),
book[2].lstrip().rstrip())
self.books[book[0]] = (book[1].strip(), book[2].strip())
except IOError:
log.exception(u'OSIS bible import failed')
finally:

View File

@ -75,6 +75,9 @@ class CCLIFileImport(SongImport):
details = chardet.detect(detect_content)
detect_file.close()
infile = codecs.open(filename, u'r', details['encoding'])
if not infile.read(3) == '\xEF\xBB\xBF':
# not UTF or no BOM was found
infile.seek(0)
lines = infile.readlines()
infile.close()
ext = os.path.splitext(filename)[1]