ewimport and __init__ back now mahfiaz has merged his

2012-07-05 19:39:47 +01:00 · 2012-07-05 19:39:47 +01:00 · 3668adc6ff
commit 3668adc6ff
parent ef29c0e97e
2 changed files with 97 additions and 97 deletions
--- a/openlp/plugins/songs/lib/init.py
+++ b/openlp/plugins/songs/lib/init.py
@ -36,7 +36,6 @@ from ui import SongStrings

 WHITESPACE = re.compile(r'[\W_]+', re.UNICODE)
 APOSTROPHE = re.compile(u'[\'`’ʻ′]', re.UNICODE)
-RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')

 class VerseType(object):
    """
@ -367,101 +366,6 @@ def clean_song(manager, song):
    if song.copyright:
        song.copyright = CONTROL_CHARS.sub(u'', song.copyright).strip()

-def strip_rtf(blob, encoding):
-    depth = 0
-    control = False
-    clear_text = []
-    control_word = []
-
-    # workaround for \tx bug: remove one pair of curly braces
-    # if \tx is encountered
-    match = RTF_STRIPPING_REGEX.search(blob)
-    if match:
-        # start and end indices of match are curly braces - filter them out
-        blob = ''.join([blob[i] for i in xrange(len(blob))
-                        if i != match.start() and i !=match.end()])
-    for c in blob:
-        if control:
-            # for delimiters, set control to False
-            if c == '{':
-                if control_word:
-                    depth += 1
-                control = False
-            elif c == '}':
-                if control_word:
-                    depth -= 1
-                control = False
-            elif c == '\\':
-                new_control = bool(control_word)
-                control = False
-            elif c.isspace():
-                control = False
-            else:
-                control_word.append(c)
-                if len(control_word) == 3 and control_word[0] == '\'':
-                    control = False
-            if not control:
-                if not control_word:
-                    if c == '{' or c == '}' or c == '\\':
-                        clear_text.append(c)
-                else:
-                    control_str = ''.join(control_word)
-                    if control_str == 'par' or control_str == 'line':
-                        clear_text.append(u'\n')
-                    elif control_str == 'tab':
-                        clear_text.append(u'\t')
-                    # Prefer the encoding specified by the RTF data to that
-                    # specified by the Paradox table header
-                    # West European encoding
-                    elif control_str == 'fcharset0':
-                        encoding = u'cp1252'
-                    # Greek encoding
-                    elif control_str == 'fcharset161':
-                        encoding = u'cp1253'
-                    # Turkish encoding
-                    elif control_str == 'fcharset162':
-                        encoding = u'cp1254'
-                    # Vietnamese encoding
-                    elif control_str == 'fcharset163':
-                        encoding = u'cp1258'
-                    # Hebrew encoding
-                    elif control_str == 'fcharset177':
-                        encoding = u'cp1255'
-                    # Arabic encoding
-                    elif control_str == 'fcharset178':
-                        encoding = u'cp1256'
-                    # Baltic encoding
-                    elif control_str == 'fcharset186':
-                        encoding = u'cp1257'
-                    # Cyrillic encoding
-                    elif control_str == 'fcharset204':
-                        encoding = u'cp1251'
-                    # Thai encoding
-                    elif control_str == 'fcharset222':
-                        encoding = u'cp874'
-                    # Central+East European encoding
-                    elif control_str == 'fcharset238':
-                        encoding = u'cp1250'
-                    elif control_str[0] == '\'':
-                        s = chr(int(control_str[1:3], 16))
-                        clear_text.append(s.decode(encoding))
-                    del control_word[:]
-            if c == '\\' and new_control:
-                control = True
-        elif c == '{':
-            depth += 1
-        elif c == '}':
-            depth -= 1
-        elif depth > 2:
-            continue
-        elif c == '\n' or c == '\r':
-            continue
-        elif c == '\\':
-            control = True
-        else:
-            clear_text.append(c)
-    return u''.join(clear_text)
-
 from xml import OpenLyrics, SongXML
 from songstab import SongsTab
 from mediaitem import SongMediaItem
--- a/openlp/plugins/songs/lib/ewimport.py
+++ b/openlp/plugins/songs/lib/ewimport.py
@ -36,14 +36,110 @@ import re

 from openlp.core.lib import translate
 from openlp.plugins.songs.lib import VerseType
-from openlp.plugins.songs.lib import retrieve_windows_encoding, strip_rtf
+from openlp.plugins.songs.lib import retrieve_windows_encoding
 from songimport import SongImport

+RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')
 # regex: at least two newlines, can have spaces between them
 SLIDE_BREAK_REGEX = re.compile(r'\n *?\n[\n ]*')
 NUMBER_REGEX = re.compile(r'[0-9]+')
 NOTE_REGEX = re.compile(r'\(.*?\)')

+def strip_rtf(blob, encoding):
+    depth = 0
+    control = False
+    clear_text = []
+    control_word = []
+
+    # workaround for \tx bug: remove one pair of curly braces
+    # if \tx is encountered
+    match = RTF_STRIPPING_REGEX.search(blob)
+    if match:
+        # start and end indices of match are curly braces - filter them out
+        blob = ''.join([blob[i] for i in xrange(len(blob))
+            if i != match.start() and i !=match.end()])
+
+    for c in blob:
+        if control:
+            # for delimiters, set control to False
+            if c == '{':
+                if control_word:
+                    depth += 1
+                control = False
+            elif c == '}':
+                if control_word:
+                    depth -= 1
+                control = False
+            elif c == '\\':
+                new_control = bool(control_word)
+                control = False
+            elif c.isspace():
+                control = False
+            else:
+                control_word.append(c)
+                if len(control_word) == 3 and control_word[0] == '\'':
+                    control = False
+            if not control:
+                if not control_word:
+                    if c == '{' or c == '}' or c == '\\':
+                        clear_text.append(c)
+                else:
+                    control_str = ''.join(control_word)
+                    if control_str == 'par' or control_str == 'line':
+                        clear_text.append(u'\n')
+                    elif control_str == 'tab':
+                        clear_text.append(u'\t')
+                    # Prefer the encoding specified by the RTF data to that
+                    # specified by the Paradox table header
+                    # West European encoding
+                    elif control_str == 'fcharset0':
+                        encoding = u'cp1252'
+                    # Greek encoding
+                    elif control_str == 'fcharset161':
+                        encoding = u'cp1253'
+                    # Turkish encoding
+                    elif control_str == 'fcharset162':
+                        encoding = u'cp1254'
+                    # Vietnamese encoding
+                    elif control_str == 'fcharset163':
+                        encoding = u'cp1258'
+                    # Hebrew encoding
+                    elif control_str == 'fcharset177':
+                        encoding = u'cp1255'
+                    # Arabic encoding
+                    elif control_str == 'fcharset178':
+                        encoding = u'cp1256'
+                    # Baltic encoding
+                    elif control_str == 'fcharset186':
+                        encoding = u'cp1257'
+                    # Cyrillic encoding
+                    elif control_str == 'fcharset204':
+                        encoding = u'cp1251'
+                    # Thai encoding
+                    elif control_str == 'fcharset222':
+                        encoding = u'cp874'
+                    # Central+East European encoding
+                    elif control_str == 'fcharset238':
+                        encoding = u'cp1250'
+                    elif control_str[0] == '\'':
+                        s = chr(int(control_str[1:3], 16))
+                        clear_text.append(s.decode(encoding))
+                    del control_word[:]
+            if c == '\\' and new_control:
+                control = True
+        elif c == '{':
+            depth += 1
+        elif c == '}':
+            depth -= 1
+        elif depth > 2:
+            continue
+        elif c == '\n' or c == '\r':
+            continue
+        elif c == '\\':
+            control = True
+        else:
+            clear_text.append(c)
+    return u''.join(clear_text)

 class FieldDescEntry:
    def __init__(self, name, type, size):