forked from openlp/openlp
ewimport and __init__ back now mahfiaz has merged his
This commit is contained in:
parent
ef29c0e97e
commit
3668adc6ff
@ -36,7 +36,6 @@ from ui import SongStrings
|
||||
|
||||
WHITESPACE = re.compile(r'[\W_]+', re.UNICODE)
|
||||
APOSTROPHE = re.compile(u'[\'`’ʻ′]', re.UNICODE)
|
||||
RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')
|
||||
|
||||
class VerseType(object):
|
||||
"""
|
||||
@ -367,101 +366,6 @@ def clean_song(manager, song):
|
||||
if song.copyright:
|
||||
song.copyright = CONTROL_CHARS.sub(u'', song.copyright).strip()
|
||||
|
||||
def strip_rtf(blob, encoding):
|
||||
depth = 0
|
||||
control = False
|
||||
clear_text = []
|
||||
control_word = []
|
||||
|
||||
# workaround for \tx bug: remove one pair of curly braces
|
||||
# if \tx is encountered
|
||||
match = RTF_STRIPPING_REGEX.search(blob)
|
||||
if match:
|
||||
# start and end indices of match are curly braces - filter them out
|
||||
blob = ''.join([blob[i] for i in xrange(len(blob))
|
||||
if i != match.start() and i !=match.end()])
|
||||
for c in blob:
|
||||
if control:
|
||||
# for delimiters, set control to False
|
||||
if c == '{':
|
||||
if control_word:
|
||||
depth += 1
|
||||
control = False
|
||||
elif c == '}':
|
||||
if control_word:
|
||||
depth -= 1
|
||||
control = False
|
||||
elif c == '\\':
|
||||
new_control = bool(control_word)
|
||||
control = False
|
||||
elif c.isspace():
|
||||
control = False
|
||||
else:
|
||||
control_word.append(c)
|
||||
if len(control_word) == 3 and control_word[0] == '\'':
|
||||
control = False
|
||||
if not control:
|
||||
if not control_word:
|
||||
if c == '{' or c == '}' or c == '\\':
|
||||
clear_text.append(c)
|
||||
else:
|
||||
control_str = ''.join(control_word)
|
||||
if control_str == 'par' or control_str == 'line':
|
||||
clear_text.append(u'\n')
|
||||
elif control_str == 'tab':
|
||||
clear_text.append(u'\t')
|
||||
# Prefer the encoding specified by the RTF data to that
|
||||
# specified by the Paradox table header
|
||||
# West European encoding
|
||||
elif control_str == 'fcharset0':
|
||||
encoding = u'cp1252'
|
||||
# Greek encoding
|
||||
elif control_str == 'fcharset161':
|
||||
encoding = u'cp1253'
|
||||
# Turkish encoding
|
||||
elif control_str == 'fcharset162':
|
||||
encoding = u'cp1254'
|
||||
# Vietnamese encoding
|
||||
elif control_str == 'fcharset163':
|
||||
encoding = u'cp1258'
|
||||
# Hebrew encoding
|
||||
elif control_str == 'fcharset177':
|
||||
encoding = u'cp1255'
|
||||
# Arabic encoding
|
||||
elif control_str == 'fcharset178':
|
||||
encoding = u'cp1256'
|
||||
# Baltic encoding
|
||||
elif control_str == 'fcharset186':
|
||||
encoding = u'cp1257'
|
||||
# Cyrillic encoding
|
||||
elif control_str == 'fcharset204':
|
||||
encoding = u'cp1251'
|
||||
# Thai encoding
|
||||
elif control_str == 'fcharset222':
|
||||
encoding = u'cp874'
|
||||
# Central+East European encoding
|
||||
elif control_str == 'fcharset238':
|
||||
encoding = u'cp1250'
|
||||
elif control_str[0] == '\'':
|
||||
s = chr(int(control_str[1:3], 16))
|
||||
clear_text.append(s.decode(encoding))
|
||||
del control_word[:]
|
||||
if c == '\\' and new_control:
|
||||
control = True
|
||||
elif c == '{':
|
||||
depth += 1
|
||||
elif c == '}':
|
||||
depth -= 1
|
||||
elif depth > 2:
|
||||
continue
|
||||
elif c == '\n' or c == '\r':
|
||||
continue
|
||||
elif c == '\\':
|
||||
control = True
|
||||
else:
|
||||
clear_text.append(c)
|
||||
return u''.join(clear_text)
|
||||
|
||||
from xml import OpenLyrics, SongXML
|
||||
from songstab import SongsTab
|
||||
from mediaitem import SongMediaItem
|
||||
|
@ -36,14 +36,110 @@ import re
|
||||
|
||||
from openlp.core.lib import translate
|
||||
from openlp.plugins.songs.lib import VerseType
|
||||
from openlp.plugins.songs.lib import retrieve_windows_encoding, strip_rtf
|
||||
from openlp.plugins.songs.lib import retrieve_windows_encoding
|
||||
from songimport import SongImport
|
||||
|
||||
RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')
|
||||
# regex: at least two newlines, can have spaces between them
|
||||
SLIDE_BREAK_REGEX = re.compile(r'\n *?\n[\n ]*')
|
||||
NUMBER_REGEX = re.compile(r'[0-9]+')
|
||||
NOTE_REGEX = re.compile(r'\(.*?\)')
|
||||
|
||||
def strip_rtf(blob, encoding):
|
||||
depth = 0
|
||||
control = False
|
||||
clear_text = []
|
||||
control_word = []
|
||||
|
||||
# workaround for \tx bug: remove one pair of curly braces
|
||||
# if \tx is encountered
|
||||
match = RTF_STRIPPING_REGEX.search(blob)
|
||||
if match:
|
||||
# start and end indices of match are curly braces - filter them out
|
||||
blob = ''.join([blob[i] for i in xrange(len(blob))
|
||||
if i != match.start() and i !=match.end()])
|
||||
|
||||
for c in blob:
|
||||
if control:
|
||||
# for delimiters, set control to False
|
||||
if c == '{':
|
||||
if control_word:
|
||||
depth += 1
|
||||
control = False
|
||||
elif c == '}':
|
||||
if control_word:
|
||||
depth -= 1
|
||||
control = False
|
||||
elif c == '\\':
|
||||
new_control = bool(control_word)
|
||||
control = False
|
||||
elif c.isspace():
|
||||
control = False
|
||||
else:
|
||||
control_word.append(c)
|
||||
if len(control_word) == 3 and control_word[0] == '\'':
|
||||
control = False
|
||||
if not control:
|
||||
if not control_word:
|
||||
if c == '{' or c == '}' or c == '\\':
|
||||
clear_text.append(c)
|
||||
else:
|
||||
control_str = ''.join(control_word)
|
||||
if control_str == 'par' or control_str == 'line':
|
||||
clear_text.append(u'\n')
|
||||
elif control_str == 'tab':
|
||||
clear_text.append(u'\t')
|
||||
# Prefer the encoding specified by the RTF data to that
|
||||
# specified by the Paradox table header
|
||||
# West European encoding
|
||||
elif control_str == 'fcharset0':
|
||||
encoding = u'cp1252'
|
||||
# Greek encoding
|
||||
elif control_str == 'fcharset161':
|
||||
encoding = u'cp1253'
|
||||
# Turkish encoding
|
||||
elif control_str == 'fcharset162':
|
||||
encoding = u'cp1254'
|
||||
# Vietnamese encoding
|
||||
elif control_str == 'fcharset163':
|
||||
encoding = u'cp1258'
|
||||
# Hebrew encoding
|
||||
elif control_str == 'fcharset177':
|
||||
encoding = u'cp1255'
|
||||
# Arabic encoding
|
||||
elif control_str == 'fcharset178':
|
||||
encoding = u'cp1256'
|
||||
# Baltic encoding
|
||||
elif control_str == 'fcharset186':
|
||||
encoding = u'cp1257'
|
||||
# Cyrillic encoding
|
||||
elif control_str == 'fcharset204':
|
||||
encoding = u'cp1251'
|
||||
# Thai encoding
|
||||
elif control_str == 'fcharset222':
|
||||
encoding = u'cp874'
|
||||
# Central+East European encoding
|
||||
elif control_str == 'fcharset238':
|
||||
encoding = u'cp1250'
|
||||
elif control_str[0] == '\'':
|
||||
s = chr(int(control_str[1:3], 16))
|
||||
clear_text.append(s.decode(encoding))
|
||||
del control_word[:]
|
||||
if c == '\\' and new_control:
|
||||
control = True
|
||||
elif c == '{':
|
||||
depth += 1
|
||||
elif c == '}':
|
||||
depth -= 1
|
||||
elif depth > 2:
|
||||
continue
|
||||
elif c == '\n' or c == '\r':
|
||||
continue
|
||||
elif c == '\\':
|
||||
control = True
|
||||
else:
|
||||
clear_text.append(c)
|
||||
return u''.join(clear_text)
|
||||
|
||||
class FieldDescEntry:
|
||||
def __init__(self, name, type, size):
|
||||
|
Loading…
Reference in New Issue
Block a user