forked from openlp/openlp
In EasyWorship song importer, add initial support for non-latin1 encodings
This commit is contained in:
parent
161a1e444a
commit
b58eecdf63
@ -35,7 +35,7 @@ import struct
|
||||
from openlp.core.lib import translate
|
||||
from songimport import SongImport
|
||||
|
||||
def strip_rtf(blob):
|
||||
def strip_rtf(blob, encoding):
|
||||
depth = 0
|
||||
control = False
|
||||
clear_text = []
|
||||
@ -69,12 +69,42 @@ def strip_rtf(blob):
|
||||
if control_str == 'par' or control_str == 'line':
|
||||
clear_text.append(u'\n')
|
||||
elif control_str == 'tab':
|
||||
clear_text.append(u'\n')
|
||||
clear_text.append(u'\t')
|
||||
# Prefer the encoding specified by the RTF data to that
|
||||
# specified by the Paradox table header
|
||||
# West European encoding
|
||||
elif control_str == 'fcharset0':
|
||||
encoding = u'cp1252'
|
||||
# Greek encoding
|
||||
elif control_str == 'fcharset161':
|
||||
encoding = u'cp1253'
|
||||
# Turkish encoding
|
||||
elif control_str == 'fcharset162':
|
||||
encoding = u'cp1254'
|
||||
# Vietnamese encoding
|
||||
elif control_str == 'fcharset163':
|
||||
encoding = u'cp1258'
|
||||
# Hebrew encoding
|
||||
elif control_str == 'fcharset177':
|
||||
encoding = u'cp1255'
|
||||
# Arabic encoding
|
||||
elif control_str == 'fcharset178':
|
||||
encoding = u'cp1256'
|
||||
# Baltic encoding
|
||||
elif control_str == 'fcharset186':
|
||||
encoding = u'cp1257'
|
||||
# Cyrillic encoding
|
||||
elif control_str == 'fcharset204':
|
||||
encoding = u'cp1251'
|
||||
# Thai encoding
|
||||
elif control_str == 'fcharset222':
|
||||
encoding = u'cp874'
|
||||
# Central+East European encoding
|
||||
elif control_str == 'fcharset238':
|
||||
encoding = u'cp1250'
|
||||
elif control_str[0] == '\'':
|
||||
# Really should take RTF character set into account but
|
||||
# for now assume ANSI (Windows-1252) and call it good
|
||||
s = chr(int(control_str[1:3], 16))
|
||||
clear_text.append(s.decode(u'windows-1252'))
|
||||
clear_text.append(s.decode(encoding))
|
||||
del control_word[:]
|
||||
if c == '\\' and new_control:
|
||||
control = True
|
||||
@ -126,6 +156,30 @@ class EasyWorshipSongImport(SongImport):
|
||||
db_file.close()
|
||||
self.memo_file.close()
|
||||
return False
|
||||
# Take a stab at how text is encoded
|
||||
self.encoding = u'cp1252'
|
||||
db_file.seek(106)
|
||||
code_page, = struct.unpack('<h', db_file.read(2))
|
||||
if code_page == 852:
|
||||
self.encoding = u'cp1250'
|
||||
# The following codepage to actual encoding mappings have not been
|
||||
# observed, but merely guessed. Actual example files are needed.
|
||||
#if code_page == 737:
|
||||
# self.encoding = u'cp1253'
|
||||
#if code_page == 775:
|
||||
# self.encoding = u'cp1257'
|
||||
#if code_page == 855:
|
||||
# self.encoding = u'cp1251'
|
||||
#if code_page == 857:
|
||||
# self.encoding = u'cp1254'
|
||||
#if code_page == 866:
|
||||
# self.encoding = u'cp1251'
|
||||
#if code_page == 869:
|
||||
# self.encoding = u'cp1253'
|
||||
#if code_page == 862:
|
||||
# self.encoding = u'cp1255'
|
||||
#if code_page == 874:
|
||||
# self.encoding = u'cp874'
|
||||
# There does not appear to be a _reliable_ way of getting the number
|
||||
# of songs/records, so let's use file blocks for measuring progress.
|
||||
total_blocks = (db_size - header_size) / (block_size * 1024)
|
||||
@ -204,7 +258,7 @@ class EasyWorshipSongImport(SongImport):
|
||||
self.add_author(author_name.strip())
|
||||
if words:
|
||||
# Format the lyrics
|
||||
words = strip_rtf(words)
|
||||
words = strip_rtf(words, self.encoding)
|
||||
for verse in words.split(u'\n\n'):
|
||||
self.add_verse(verse.strip(), u'V')
|
||||
if self.stop_import_flag:
|
||||
@ -263,7 +317,7 @@ class EasyWorshipSongImport(SongImport):
|
||||
# Format the field depending on the field type
|
||||
if field_desc.type == 1:
|
||||
# string
|
||||
return field.rstrip('\0').decode(u'windows-1252')
|
||||
return field.rstrip('\0').decode(self.encoding)
|
||||
elif field_desc.type == 3:
|
||||
# 16-bit int
|
||||
return field ^ 0x8000
|
||||
|
Loading…
Reference in New Issue
Block a user