From b58eecdf6396c2c238c3bcbb3dca3985a54824ea Mon Sep 17 00:00:00 2001 From: "Jeffrey S. Smith" Date: Thu, 14 Oct 2010 13:15:02 -0500 Subject: [PATCH] In EasyWorship song importer, add initial support for non-latin1 encodings --- openlp/plugins/songs/lib/ewimport.py | 68 +++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/openlp/plugins/songs/lib/ewimport.py b/openlp/plugins/songs/lib/ewimport.py index 2db1df375..4188304cd 100644 --- a/openlp/plugins/songs/lib/ewimport.py +++ b/openlp/plugins/songs/lib/ewimport.py @@ -35,7 +35,7 @@ import struct from openlp.core.lib import translate from songimport import SongImport -def strip_rtf(blob): +def strip_rtf(blob, encoding): depth = 0 control = False clear_text = [] @@ -69,12 +69,42 @@ def strip_rtf(blob): if control_str == 'par' or control_str == 'line': clear_text.append(u'\n') elif control_str == 'tab': - clear_text.append(u'\n') + clear_text.append(u'\t') + # Prefer the encoding specified by the RTF data to that + # specified by the Paradox table header + # West European encoding + elif control_str == 'fcharset0': + encoding = u'cp1252' + # Greek encoding + elif control_str == 'fcharset161': + encoding = u'cp1253' + # Turkish encoding + elif control_str == 'fcharset162': + encoding = u'cp1254' + # Vietnamese encoding + elif control_str == 'fcharset163': + encoding = u'cp1258' + # Hebrew encoding + elif control_str == 'fcharset177': + encoding = u'cp1255' + # Arabic encoding + elif control_str == 'fcharset178': + encoding = u'cp1256' + # Baltic encoding + elif control_str == 'fcharset186': + encoding = u'cp1257' + # Cyrillic encoding + elif control_str == 'fcharset204': + encoding = u'cp1251' + # Thai encoding + elif control_str == 'fcharset222': + encoding = u'cp874' + # Central+East European encoding + elif control_str == 'fcharset238': + encoding = u'cp1250' elif control_str[0] == '\'': - # Really should take RTF character set into account but - # for now assume ANSI (Windows-1252) and call it good s = chr(int(control_str[1:3], 16)) - clear_text.append(s.decode(u'windows-1252')) + clear_text.append(s.decode(encoding)) del control_word[:] if c == '\\' and new_control: control = True @@ -126,6 +156,30 @@ class EasyWorshipSongImport(SongImport): db_file.close() self.memo_file.close() return False + # Take a stab at how text is encoded + self.encoding = u'cp1252' + db_file.seek(106) + code_page, = struct.unpack('