forked from openlp/openlp
In EasyWorship song importer, add initial support for non-latin1 encodings
This commit is contained in:
parent
161a1e444a
commit
b58eecdf63
@ -35,7 +35,7 @@ import struct
|
|||||||
from openlp.core.lib import translate
|
from openlp.core.lib import translate
|
||||||
from songimport import SongImport
|
from songimport import SongImport
|
||||||
|
|
||||||
def strip_rtf(blob):
|
def strip_rtf(blob, encoding):
|
||||||
depth = 0
|
depth = 0
|
||||||
control = False
|
control = False
|
||||||
clear_text = []
|
clear_text = []
|
||||||
@ -69,12 +69,42 @@ def strip_rtf(blob):
|
|||||||
if control_str == 'par' or control_str == 'line':
|
if control_str == 'par' or control_str == 'line':
|
||||||
clear_text.append(u'\n')
|
clear_text.append(u'\n')
|
||||||
elif control_str == 'tab':
|
elif control_str == 'tab':
|
||||||
clear_text.append(u'\n')
|
clear_text.append(u'\t')
|
||||||
|
# Prefer the encoding specified by the RTF data to that
|
||||||
|
# specified by the Paradox table header
|
||||||
|
# West European encoding
|
||||||
|
elif control_str == 'fcharset0':
|
||||||
|
encoding = u'cp1252'
|
||||||
|
# Greek encoding
|
||||||
|
elif control_str == 'fcharset161':
|
||||||
|
encoding = u'cp1253'
|
||||||
|
# Turkish encoding
|
||||||
|
elif control_str == 'fcharset162':
|
||||||
|
encoding = u'cp1254'
|
||||||
|
# Vietnamese encoding
|
||||||
|
elif control_str == 'fcharset163':
|
||||||
|
encoding = u'cp1258'
|
||||||
|
# Hebrew encoding
|
||||||
|
elif control_str == 'fcharset177':
|
||||||
|
encoding = u'cp1255'
|
||||||
|
# Arabic encoding
|
||||||
|
elif control_str == 'fcharset178':
|
||||||
|
encoding = u'cp1256'
|
||||||
|
# Baltic encoding
|
||||||
|
elif control_str == 'fcharset186':
|
||||||
|
encoding = u'cp1257'
|
||||||
|
# Cyrillic encoding
|
||||||
|
elif control_str == 'fcharset204':
|
||||||
|
encoding = u'cp1251'
|
||||||
|
# Thai encoding
|
||||||
|
elif control_str == 'fcharset222':
|
||||||
|
encoding = u'cp874'
|
||||||
|
# Central+East European encoding
|
||||||
|
elif control_str == 'fcharset238':
|
||||||
|
encoding = u'cp1250'
|
||||||
elif control_str[0] == '\'':
|
elif control_str[0] == '\'':
|
||||||
# Really should take RTF character set into account but
|
|
||||||
# for now assume ANSI (Windows-1252) and call it good
|
|
||||||
s = chr(int(control_str[1:3], 16))
|
s = chr(int(control_str[1:3], 16))
|
||||||
clear_text.append(s.decode(u'windows-1252'))
|
clear_text.append(s.decode(encoding))
|
||||||
del control_word[:]
|
del control_word[:]
|
||||||
if c == '\\' and new_control:
|
if c == '\\' and new_control:
|
||||||
control = True
|
control = True
|
||||||
@ -126,6 +156,30 @@ class EasyWorshipSongImport(SongImport):
|
|||||||
db_file.close()
|
db_file.close()
|
||||||
self.memo_file.close()
|
self.memo_file.close()
|
||||||
return False
|
return False
|
||||||
|
# Take a stab at how text is encoded
|
||||||
|
self.encoding = u'cp1252'
|
||||||
|
db_file.seek(106)
|
||||||
|
code_page, = struct.unpack('<h', db_file.read(2))
|
||||||
|
if code_page == 852:
|
||||||
|
self.encoding = u'cp1250'
|
||||||
|
# The following codepage to actual encoding mappings have not been
|
||||||
|
# observed, but merely guessed. Actual example files are needed.
|
||||||
|
#if code_page == 737:
|
||||||
|
# self.encoding = u'cp1253'
|
||||||
|
#if code_page == 775:
|
||||||
|
# self.encoding = u'cp1257'
|
||||||
|
#if code_page == 855:
|
||||||
|
# self.encoding = u'cp1251'
|
||||||
|
#if code_page == 857:
|
||||||
|
# self.encoding = u'cp1254'
|
||||||
|
#if code_page == 866:
|
||||||
|
# self.encoding = u'cp1251'
|
||||||
|
#if code_page == 869:
|
||||||
|
# self.encoding = u'cp1253'
|
||||||
|
#if code_page == 862:
|
||||||
|
# self.encoding = u'cp1255'
|
||||||
|
#if code_page == 874:
|
||||||
|
# self.encoding = u'cp874'
|
||||||
# There does not appear to be a _reliable_ way of getting the number
|
# There does not appear to be a _reliable_ way of getting the number
|
||||||
# of songs/records, so let's use file blocks for measuring progress.
|
# of songs/records, so let's use file blocks for measuring progress.
|
||||||
total_blocks = (db_size - header_size) / (block_size * 1024)
|
total_blocks = (db_size - header_size) / (block_size * 1024)
|
||||||
@ -204,7 +258,7 @@ class EasyWorshipSongImport(SongImport):
|
|||||||
self.add_author(author_name.strip())
|
self.add_author(author_name.strip())
|
||||||
if words:
|
if words:
|
||||||
# Format the lyrics
|
# Format the lyrics
|
||||||
words = strip_rtf(words)
|
words = strip_rtf(words, self.encoding)
|
||||||
for verse in words.split(u'\n\n'):
|
for verse in words.split(u'\n\n'):
|
||||||
self.add_verse(verse.strip(), u'V')
|
self.add_verse(verse.strip(), u'V')
|
||||||
if self.stop_import_flag:
|
if self.stop_import_flag:
|
||||||
@ -263,7 +317,7 @@ class EasyWorshipSongImport(SongImport):
|
|||||||
# Format the field depending on the field type
|
# Format the field depending on the field type
|
||||||
if field_desc.type == 1:
|
if field_desc.type == 1:
|
||||||
# string
|
# string
|
||||||
return field.rstrip('\0').decode(u'windows-1252')
|
return field.rstrip('\0').decode(self.encoding)
|
||||||
elif field_desc.type == 3:
|
elif field_desc.type == 3:
|
||||||
# 16-bit int
|
# 16-bit int
|
||||||
return field ^ 0x8000
|
return field ^ 0x8000
|
||||||
|
Loading…
Reference in New Issue
Block a user