forked from openlp/openlp
Better handling of encodings. User is asked only once, if possible.
This commit is contained in:
parent
75ae9065d0
commit
25dc4fe36c
@ -473,15 +473,16 @@ class StripRtf():
|
|||||||
u'fcharset204': u'cp1251',
|
u'fcharset204': u'cp1251',
|
||||||
u'fcharset222': u'cp874',
|
u'fcharset222': u'cp874',
|
||||||
u'fcharset238': u'cp1250'}
|
u'fcharset238': u'cp1250'}
|
||||||
|
# If user is asked for an encoding, it is used since then.
|
||||||
|
user_encoding = []
|
||||||
|
|
||||||
def strip_rtf(self, text, default_encoding=None):
|
def strip_rtf(self, text, default_encoding=None):
|
||||||
|
self.default_encoding = default_encoding
|
||||||
# Current font is the font tag we last met.
|
# Current font is the font tag we last met.
|
||||||
font = u''
|
font = u''
|
||||||
# Character encoding is defined inside fonttable.
|
# Character encoding is defined inside fonttable.
|
||||||
# font_table could contain eg u'0': u'cp1252'
|
# font_table could contain eg u'0': u'cp1252'
|
||||||
font_table = {u'': default_encoding}
|
font_table = {u'': default_encoding}
|
||||||
# Whether we are inside the font table.
|
|
||||||
inside_font_table = False
|
|
||||||
# Stack of things to keep track of when entering/leaving groups.
|
# Stack of things to keep track of when entering/leaving groups.
|
||||||
stack = []
|
stack = []
|
||||||
# Whether this group (and all inside it) are "ignorable".
|
# Whether this group (and all inside it) are "ignorable".
|
||||||
@ -498,10 +499,10 @@ class StripRtf():
|
|||||||
curskip = 0
|
curskip = 0
|
||||||
if brace == u'{':
|
if brace == u'{':
|
||||||
# Push state
|
# Push state
|
||||||
stack.append((ucskip, ignorable, font, inside_font_table))
|
stack.append((ucskip, ignorable, font))
|
||||||
elif brace == u'}':
|
elif brace == u'}':
|
||||||
# Pop state
|
# Pop state
|
||||||
ucskip, ignorable, font, inside_font_table = stack.pop()
|
ucskip, ignorable, font = stack.pop()
|
||||||
# \x (not a letter)
|
# \x (not a letter)
|
||||||
elif char:
|
elif char:
|
||||||
curskip = 0
|
curskip = 0
|
||||||
@ -533,29 +534,19 @@ class StripRtf():
|
|||||||
ignorable = True
|
ignorable = True
|
||||||
elif word == u'f':
|
elif word == u'f':
|
||||||
font = arg
|
font = arg
|
||||||
if not inside_font_table:
|
|
||||||
if arg in font_table.keys():
|
|
||||||
encoding = font_table[arg]
|
|
||||||
else:
|
|
||||||
encoding = default_encoding
|
|
||||||
elif word == u'ansicpg':
|
elif word == u'ansicpg':
|
||||||
if font == u'':
|
font_table[font] = 'cp' + arg
|
||||||
if inside_font_table or font == u'':
|
|
||||||
font_table[font] = 'cp' + arg
|
|
||||||
elif word == u'fcharset':
|
elif word == u'fcharset':
|
||||||
charset_reference = word + arg
|
charset_reference = word + arg
|
||||||
if charset_reference in self.CHARSET_MAPPING:
|
if charset_reference in self.CHARSET_MAPPING:
|
||||||
charset = self.CHARSET_MAPPING[charset_reference]
|
charset = self.CHARSET_MAPPING[charset_reference]
|
||||||
if not charset:
|
|
||||||
charset = default_encoding
|
|
||||||
else:
|
else:
|
||||||
|
charset = None
|
||||||
log.error(u"Charset '%s' not in CHARSET_MAPPING "
|
log.error(u"Charset '%s' not in CHARSET_MAPPING "
|
||||||
u"dictionary in "
|
u"dictionary in "
|
||||||
u"openlp/plugins/songs/lib/__init__.py"
|
u"openlp/plugins/songs/lib/__init__.py"
|
||||||
% charset_reference)
|
% charset_reference)
|
||||||
charset = default_encoding
|
if font not in font_table:
|
||||||
if font == u'':
|
|
||||||
if inside_font_table or font == u'':
|
|
||||||
font_table[font] = charset
|
font_table[font] = charset
|
||||||
# \'xx
|
# \'xx
|
||||||
elif hex:
|
elif hex:
|
||||||
@ -563,14 +554,13 @@ class StripRtf():
|
|||||||
curskip -= 1
|
curskip -= 1
|
||||||
elif not ignorable:
|
elif not ignorable:
|
||||||
charcode = int(hex, 16)
|
charcode = int(hex, 16)
|
||||||
|
encoding = self.get_encoding(font, font_table)
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
out.append(chr(charcode).decode(encoding))
|
out.append(chr(charcode).decode(encoding))
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
encoding = \
|
encoding = self.get_encoding(font, font_table,
|
||||||
retrieve_windows_encoding(default_encoding)
|
failed=True)
|
||||||
if font:
|
|
||||||
font_table[font] = encoding
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
elif tchar:
|
elif tchar:
|
||||||
@ -580,6 +570,23 @@ class StripRtf():
|
|||||||
out.append(tchar)
|
out.append(tchar)
|
||||||
return u''.join(out)
|
return u''.join(out)
|
||||||
|
|
||||||
|
def get_encoding(self, font, font_table, failed=False):
|
||||||
|
encoding = None
|
||||||
|
if font in font_table:
|
||||||
|
encoding = font_table[font]
|
||||||
|
if not encoding and len(self.user_encoding):
|
||||||
|
encoding = self.user_encoding[-1]
|
||||||
|
if not encoding and self.default_encoding:
|
||||||
|
encoding = self.default_encoding
|
||||||
|
if not encoding or (failed and self.user_encoding == encoding):
|
||||||
|
encoding = retrieve_windows_encoding(self.default_encoding)
|
||||||
|
if encoding not in self.user_encoding:
|
||||||
|
self.user_encoding.append(encoding)
|
||||||
|
elif failed:
|
||||||
|
encoding = self.user_encoding
|
||||||
|
font_table[font] = encoding
|
||||||
|
return encoding
|
||||||
|
|
||||||
from xml import OpenLyrics, SongXML
|
from xml import OpenLyrics, SongXML
|
||||||
from songstab import SongsTab
|
from songstab import SongsTab
|
||||||
from mediaitem import SongMediaItem
|
from mediaitem import SongMediaItem
|
||||||
|
Loading…
Reference in New Issue
Block a user