forked from openlp/openlp
ewimport and __init__ back now mahfiaz has merged his
This commit is contained in:
parent
ef29c0e97e
commit
3668adc6ff
@ -36,7 +36,6 @@ from ui import SongStrings
|
|||||||
|
|
||||||
WHITESPACE = re.compile(r'[\W_]+', re.UNICODE)
|
WHITESPACE = re.compile(r'[\W_]+', re.UNICODE)
|
||||||
APOSTROPHE = re.compile(u'[\'`’ʻ′]', re.UNICODE)
|
APOSTROPHE = re.compile(u'[\'`’ʻ′]', re.UNICODE)
|
||||||
RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')
|
|
||||||
|
|
||||||
class VerseType(object):
|
class VerseType(object):
|
||||||
"""
|
"""
|
||||||
@ -367,101 +366,6 @@ def clean_song(manager, song):
|
|||||||
if song.copyright:
|
if song.copyright:
|
||||||
song.copyright = CONTROL_CHARS.sub(u'', song.copyright).strip()
|
song.copyright = CONTROL_CHARS.sub(u'', song.copyright).strip()
|
||||||
|
|
||||||
def strip_rtf(blob, encoding):
|
|
||||||
depth = 0
|
|
||||||
control = False
|
|
||||||
clear_text = []
|
|
||||||
control_word = []
|
|
||||||
|
|
||||||
# workaround for \tx bug: remove one pair of curly braces
|
|
||||||
# if \tx is encountered
|
|
||||||
match = RTF_STRIPPING_REGEX.search(blob)
|
|
||||||
if match:
|
|
||||||
# start and end indices of match are curly braces - filter them out
|
|
||||||
blob = ''.join([blob[i] for i in xrange(len(blob))
|
|
||||||
if i != match.start() and i !=match.end()])
|
|
||||||
for c in blob:
|
|
||||||
if control:
|
|
||||||
# for delimiters, set control to False
|
|
||||||
if c == '{':
|
|
||||||
if control_word:
|
|
||||||
depth += 1
|
|
||||||
control = False
|
|
||||||
elif c == '}':
|
|
||||||
if control_word:
|
|
||||||
depth -= 1
|
|
||||||
control = False
|
|
||||||
elif c == '\\':
|
|
||||||
new_control = bool(control_word)
|
|
||||||
control = False
|
|
||||||
elif c.isspace():
|
|
||||||
control = False
|
|
||||||
else:
|
|
||||||
control_word.append(c)
|
|
||||||
if len(control_word) == 3 and control_word[0] == '\'':
|
|
||||||
control = False
|
|
||||||
if not control:
|
|
||||||
if not control_word:
|
|
||||||
if c == '{' or c == '}' or c == '\\':
|
|
||||||
clear_text.append(c)
|
|
||||||
else:
|
|
||||||
control_str = ''.join(control_word)
|
|
||||||
if control_str == 'par' or control_str == 'line':
|
|
||||||
clear_text.append(u'\n')
|
|
||||||
elif control_str == 'tab':
|
|
||||||
clear_text.append(u'\t')
|
|
||||||
# Prefer the encoding specified by the RTF data to that
|
|
||||||
# specified by the Paradox table header
|
|
||||||
# West European encoding
|
|
||||||
elif control_str == 'fcharset0':
|
|
||||||
encoding = u'cp1252'
|
|
||||||
# Greek encoding
|
|
||||||
elif control_str == 'fcharset161':
|
|
||||||
encoding = u'cp1253'
|
|
||||||
# Turkish encoding
|
|
||||||
elif control_str == 'fcharset162':
|
|
||||||
encoding = u'cp1254'
|
|
||||||
# Vietnamese encoding
|
|
||||||
elif control_str == 'fcharset163':
|
|
||||||
encoding = u'cp1258'
|
|
||||||
# Hebrew encoding
|
|
||||||
elif control_str == 'fcharset177':
|
|
||||||
encoding = u'cp1255'
|
|
||||||
# Arabic encoding
|
|
||||||
elif control_str == 'fcharset178':
|
|
||||||
encoding = u'cp1256'
|
|
||||||
# Baltic encoding
|
|
||||||
elif control_str == 'fcharset186':
|
|
||||||
encoding = u'cp1257'
|
|
||||||
# Cyrillic encoding
|
|
||||||
elif control_str == 'fcharset204':
|
|
||||||
encoding = u'cp1251'
|
|
||||||
# Thai encoding
|
|
||||||
elif control_str == 'fcharset222':
|
|
||||||
encoding = u'cp874'
|
|
||||||
# Central+East European encoding
|
|
||||||
elif control_str == 'fcharset238':
|
|
||||||
encoding = u'cp1250'
|
|
||||||
elif control_str[0] == '\'':
|
|
||||||
s = chr(int(control_str[1:3], 16))
|
|
||||||
clear_text.append(s.decode(encoding))
|
|
||||||
del control_word[:]
|
|
||||||
if c == '\\' and new_control:
|
|
||||||
control = True
|
|
||||||
elif c == '{':
|
|
||||||
depth += 1
|
|
||||||
elif c == '}':
|
|
||||||
depth -= 1
|
|
||||||
elif depth > 2:
|
|
||||||
continue
|
|
||||||
elif c == '\n' or c == '\r':
|
|
||||||
continue
|
|
||||||
elif c == '\\':
|
|
||||||
control = True
|
|
||||||
else:
|
|
||||||
clear_text.append(c)
|
|
||||||
return u''.join(clear_text)
|
|
||||||
|
|
||||||
from xml import OpenLyrics, SongXML
|
from xml import OpenLyrics, SongXML
|
||||||
from songstab import SongsTab
|
from songstab import SongsTab
|
||||||
from mediaitem import SongMediaItem
|
from mediaitem import SongMediaItem
|
||||||
|
@ -36,14 +36,110 @@ import re
|
|||||||
|
|
||||||
from openlp.core.lib import translate
|
from openlp.core.lib import translate
|
||||||
from openlp.plugins.songs.lib import VerseType
|
from openlp.plugins.songs.lib import VerseType
|
||||||
from openlp.plugins.songs.lib import retrieve_windows_encoding, strip_rtf
|
from openlp.plugins.songs.lib import retrieve_windows_encoding
|
||||||
from songimport import SongImport
|
from songimport import SongImport
|
||||||
|
|
||||||
|
RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')
|
||||||
# regex: at least two newlines, can have spaces between them
|
# regex: at least two newlines, can have spaces between them
|
||||||
SLIDE_BREAK_REGEX = re.compile(r'\n *?\n[\n ]*')
|
SLIDE_BREAK_REGEX = re.compile(r'\n *?\n[\n ]*')
|
||||||
NUMBER_REGEX = re.compile(r'[0-9]+')
|
NUMBER_REGEX = re.compile(r'[0-9]+')
|
||||||
NOTE_REGEX = re.compile(r'\(.*?\)')
|
NOTE_REGEX = re.compile(r'\(.*?\)')
|
||||||
|
|
||||||
|
def strip_rtf(blob, encoding):
|
||||||
|
depth = 0
|
||||||
|
control = False
|
||||||
|
clear_text = []
|
||||||
|
control_word = []
|
||||||
|
|
||||||
|
# workaround for \tx bug: remove one pair of curly braces
|
||||||
|
# if \tx is encountered
|
||||||
|
match = RTF_STRIPPING_REGEX.search(blob)
|
||||||
|
if match:
|
||||||
|
# start and end indices of match are curly braces - filter them out
|
||||||
|
blob = ''.join([blob[i] for i in xrange(len(blob))
|
||||||
|
if i != match.start() and i !=match.end()])
|
||||||
|
|
||||||
|
for c in blob:
|
||||||
|
if control:
|
||||||
|
# for delimiters, set control to False
|
||||||
|
if c == '{':
|
||||||
|
if control_word:
|
||||||
|
depth += 1
|
||||||
|
control = False
|
||||||
|
elif c == '}':
|
||||||
|
if control_word:
|
||||||
|
depth -= 1
|
||||||
|
control = False
|
||||||
|
elif c == '\\':
|
||||||
|
new_control = bool(control_word)
|
||||||
|
control = False
|
||||||
|
elif c.isspace():
|
||||||
|
control = False
|
||||||
|
else:
|
||||||
|
control_word.append(c)
|
||||||
|
if len(control_word) == 3 and control_word[0] == '\'':
|
||||||
|
control = False
|
||||||
|
if not control:
|
||||||
|
if not control_word:
|
||||||
|
if c == '{' or c == '}' or c == '\\':
|
||||||
|
clear_text.append(c)
|
||||||
|
else:
|
||||||
|
control_str = ''.join(control_word)
|
||||||
|
if control_str == 'par' or control_str == 'line':
|
||||||
|
clear_text.append(u'\n')
|
||||||
|
elif control_str == 'tab':
|
||||||
|
clear_text.append(u'\t')
|
||||||
|
# Prefer the encoding specified by the RTF data to that
|
||||||
|
# specified by the Paradox table header
|
||||||
|
# West European encoding
|
||||||
|
elif control_str == 'fcharset0':
|
||||||
|
encoding = u'cp1252'
|
||||||
|
# Greek encoding
|
||||||
|
elif control_str == 'fcharset161':
|
||||||
|
encoding = u'cp1253'
|
||||||
|
# Turkish encoding
|
||||||
|
elif control_str == 'fcharset162':
|
||||||
|
encoding = u'cp1254'
|
||||||
|
# Vietnamese encoding
|
||||||
|
elif control_str == 'fcharset163':
|
||||||
|
encoding = u'cp1258'
|
||||||
|
# Hebrew encoding
|
||||||
|
elif control_str == 'fcharset177':
|
||||||
|
encoding = u'cp1255'
|
||||||
|
# Arabic encoding
|
||||||
|
elif control_str == 'fcharset178':
|
||||||
|
encoding = u'cp1256'
|
||||||
|
# Baltic encoding
|
||||||
|
elif control_str == 'fcharset186':
|
||||||
|
encoding = u'cp1257'
|
||||||
|
# Cyrillic encoding
|
||||||
|
elif control_str == 'fcharset204':
|
||||||
|
encoding = u'cp1251'
|
||||||
|
# Thai encoding
|
||||||
|
elif control_str == 'fcharset222':
|
||||||
|
encoding = u'cp874'
|
||||||
|
# Central+East European encoding
|
||||||
|
elif control_str == 'fcharset238':
|
||||||
|
encoding = u'cp1250'
|
||||||
|
elif control_str[0] == '\'':
|
||||||
|
s = chr(int(control_str[1:3], 16))
|
||||||
|
clear_text.append(s.decode(encoding))
|
||||||
|
del control_word[:]
|
||||||
|
if c == '\\' and new_control:
|
||||||
|
control = True
|
||||||
|
elif c == '{':
|
||||||
|
depth += 1
|
||||||
|
elif c == '}':
|
||||||
|
depth -= 1
|
||||||
|
elif depth > 2:
|
||||||
|
continue
|
||||||
|
elif c == '\n' or c == '\r':
|
||||||
|
continue
|
||||||
|
elif c == '\\':
|
||||||
|
control = True
|
||||||
|
else:
|
||||||
|
clear_text.append(c)
|
||||||
|
return u''.join(clear_text)
|
||||||
|
|
||||||
class FieldDescEntry:
|
class FieldDescEntry:
|
||||||
def __init__(self, name, type, size):
|
def __init__(self, name, type, size):
|
||||||
|
Loading…
Reference in New Issue
Block a user