forked from openlp/openlp
make tidy text part of core.\nRemove redundant (since py3) re.UNICODE flag
Fixes: https://launchpad.net/bugs/1727517
This commit is contained in:
parent
fe5430e157
commit
960ddedc6f
@ -43,9 +43,13 @@ log = logging.getLogger(__name__ + '.__init__')
|
||||
|
||||
FIRST_CAMEL_REGEX = re.compile('(.)([A-Z][a-z]+)')
|
||||
SECOND_CAMEL_REGEX = re.compile('([a-z0-9])([A-Z])')
|
||||
CONTROL_CHARS = re.compile(r'[\x00-\x1F\x7F-\x9F]', re.UNICODE)
|
||||
INVALID_FILE_CHARS = re.compile(r'[\\/:\*\?"<>\|\+\[\]%]', re.UNICODE)
|
||||
CONTROL_CHARS = re.compile(r'[\x00-\x1F\x7F-\x9F]')
|
||||
INVALID_FILE_CHARS = re.compile(r'[\\/:\*\?"<>\|\+\[\]%]')
|
||||
IMAGES_FILTER = None
|
||||
REPLACMENT_CHARS_MAP = str.maketrans({'\u2018': '\'', '\u2019': '\'', '\u201c': '"', '\u201d': '"', '\u2026': '...',
|
||||
'\u2013': '-', '\u2014': '-', '\v': '\n\n', '\f': '\n\n'})
|
||||
NEW_LINE_REGEX = re.compile(r' ?(\r\n?|\n) ?')
|
||||
WHITESPACE_REGEX = re.compile(r'[ \t]+')
|
||||
|
||||
|
||||
def trace_error_handler(logger):
|
||||
@ -436,3 +440,13 @@ def get_file_encoding(file_path):
|
||||
return detector.result
|
||||
except OSError:
|
||||
log.exception('Error detecting file encoding')
|
||||
|
||||
def normalize_str(irreg_str):
|
||||
"""
|
||||
|
||||
:param str irreg_str:
|
||||
:return:
|
||||
"""
|
||||
irreg_str = irreg_str.translate(REPLACMENT_CHARS_MAP)
|
||||
irreg_str = NEW_LINE_REGEX.sub('\n', irreg_str)
|
||||
return WHITESPACE_REGEX.sub(' ', irreg_str)
|
||||
|
@ -53,7 +53,7 @@ def translate(context, text, comment=None, qt_translate=QtCore.QCoreApplication.
|
||||
|
||||
Language = namedtuple('Language', ['id', 'name', 'code'])
|
||||
ICU_COLLATOR = None
|
||||
DIGITS_OR_NONDIGITS = re.compile(r'\d+|\D+', re.UNICODE)
|
||||
DIGITS_OR_NONDIGITS = re.compile(r'\d+|\D+')
|
||||
LANGUAGES = sorted([
|
||||
Language(1, translate('common.languages', '(Afan) Oromo', 'Language code: om'), 'om'),
|
||||
Language(2, translate('common.languages', 'Abkhazian', 'Language code: ab'), 'ab'),
|
||||
|
@ -38,7 +38,6 @@ log = logging.getLogger(__name__ + '.__init__')
|
||||
|
||||
SLIMCHARS = 'fiíIÍjlĺľrtť.,;/ ()|"\'!:\\'
|
||||
|
||||
|
||||
class ServiceItemContext(object):
|
||||
"""
|
||||
The context in which a Service Item is being generated
|
||||
|
@ -92,7 +92,7 @@ class MediaManagerItem(QtWidgets.QWidget, RegistryProperties):
|
||||
Run some initial setup. This method is separate from __init__ in order to mock it out in tests.
|
||||
"""
|
||||
self.hide()
|
||||
self.whitespace = re.compile(r'[\W_]+', re.UNICODE)
|
||||
self.whitespace = re.compile(r'[\W_]+')
|
||||
visible_title = self.plugin.get_string(StringContent.VisibleName)
|
||||
self.title = str(visible_title['title'])
|
||||
Registry().register(self.plugin.name, self)
|
||||
|
@ -43,7 +43,7 @@ class FormattingTagController(object):
|
||||
r'(?P<tag>[^\s/!\?>]+)(?:\s+[^\s=]+="[^"]*")*\s*(?P<empty>/)?'
|
||||
r'|(?P<cdata>!\[CDATA\[(?:(?!\]\]>).)*\]\])'
|
||||
r'|(?P<procinst>\?(?:(?!\?>).)*\?)'
|
||||
r'|(?P<comment>!--(?:(?!-->).)*--))>', re.UNICODE)
|
||||
r'|(?P<comment>!--(?:(?!-->).)*--))>')
|
||||
self.html_regex = re.compile(r'^(?:[^<>]*%s)*[^<>]*$' % self.html_tag_regex.pattern)
|
||||
|
||||
def pre_save(self):
|
||||
|
@ -113,8 +113,7 @@ class BookNameForm(QDialog, Ui_BookNameDialog):
|
||||
cor_book = self.corresponding_combo_box.currentText()
|
||||
for character in '\\.^$*+?{}[]()':
|
||||
cor_book = cor_book.replace(character, '\\' + character)
|
||||
books = [key for key in list(self.book_names.keys()) if re.match(cor_book, str(self.book_names[key]),
|
||||
re.UNICODE)]
|
||||
books = [key for key in list(self.book_names.keys()) if re.match(cor_book, str(self.book_names[key]))]
|
||||
books = [_f for _f in map(BiblesResourcesDB.get_book, books) if _f]
|
||||
if books:
|
||||
self.book_id = books[0]['id']
|
||||
|
@ -224,13 +224,13 @@ def update_reference_separators():
|
||||
range_regex = '(?:(?P<from_chapter>[0-9]+){sep_v})?' \
|
||||
'(?P<from_verse>[0-9]+)(?P<range_to>{sep_r}(?:(?:(?P<to_chapter>' \
|
||||
'[0-9]+){sep_v})?(?P<to_verse>[0-9]+)|{sep_e})?)?'.format_map(REFERENCE_SEPARATORS)
|
||||
REFERENCE_MATCHES['range'] = re.compile(r'^\s*{range}\s*$'.format(range=range_regex), re.UNICODE)
|
||||
REFERENCE_MATCHES['range_separator'] = re.compile(REFERENCE_SEPARATORS['sep_l'], re.UNICODE)
|
||||
REFERENCE_MATCHES['range'] = re.compile(r'^\s*{range}\s*$'.format(range=range_regex))
|
||||
REFERENCE_MATCHES['range_separator'] = re.compile(REFERENCE_SEPARATORS['sep_l'])
|
||||
# full reference match: <book>(<range>(,(?!$)|(?=$)))+
|
||||
REFERENCE_MATCHES['full'] = \
|
||||
re.compile(r'^\s*(?!\s)(?P<book>[\d]*[.]?[^\d\.]+)\.*(?<!\s)\s*'
|
||||
r'(?P<ranges>(?:{range_regex}(?:{sep_l}(?!\s*$)|(?=\s*$)))+)\s*$'.format(
|
||||
range_regex=range_regex, sep_l=REFERENCE_SEPARATORS['sep_l']), re.UNICODE)
|
||||
range_regex=range_regex, sep_l=REFERENCE_SEPARATORS['sep_l']))
|
||||
|
||||
|
||||
def get_reference_separator(separator_type):
|
||||
|
@ -307,8 +307,7 @@ class BibleDB(Manager):
|
||||
book_escaped = book
|
||||
for character in RESERVED_CHARACTERS:
|
||||
book_escaped = book_escaped.replace(character, '\\' + character)
|
||||
regex_book = re.compile('\\s*{book}\\s*'.format(book='\\s*'.join(book_escaped.split())),
|
||||
re.UNICODE | re.IGNORECASE)
|
||||
regex_book = re.compile('\\s*{book}\\s*'.format(book='\\s*'.join(book_escaped.split())), re.IGNORECASE)
|
||||
if language_selection == LanguageSelection.Bible:
|
||||
db_book = self.get_book(book)
|
||||
if db_book:
|
||||
|
@ -105,9 +105,9 @@ class EditSongForm(QtWidgets.QDialog, Ui_EditSongDialog, RegistryProperties):
|
||||
self.topics_list_view.setSortingEnabled(False)
|
||||
self.topics_list_view.setAlternatingRowColors(True)
|
||||
self.audio_list_widget.setAlternatingRowColors(True)
|
||||
self.find_verse_split = re.compile('---\[\]---\n', re.UNICODE)
|
||||
self.whitespace = re.compile(r'\W+', re.UNICODE)
|
||||
self.find_tags = re.compile(u'\{/?\w+\}', re.UNICODE)
|
||||
self.find_verse_split = re.compile('---\[\]---\n')
|
||||
self.whitespace = re.compile(r'\W+')
|
||||
self.find_tags = re.compile(r'\{/?\w+\}')
|
||||
|
||||
def _load_objects(self, cls, combo, cache):
|
||||
"""
|
||||
|
@ -24,7 +24,6 @@ The :mod:`~openlp.plugins.songs.lib` module contains a number of library functio
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
from PyQt5 import QtWidgets
|
||||
@ -39,8 +38,8 @@ from openlp.plugins.songs.lib.ui import SongStrings
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
WHITESPACE = re.compile(r'[\W_]+', re.UNICODE)
|
||||
APOSTROPHE = re.compile('[\'`’ʻ′]', re.UNICODE)
|
||||
WHITESPACE = re.compile(r'[\W_]+')
|
||||
APOSTROPHE = re.compile(r'[\'`’ʻ′]')
|
||||
# PATTERN will look for the next occurence of one of these symbols:
|
||||
# \controlword - optionally preceded by \*, optionally followed by a number
|
||||
# \'## - where ## is a pair of hex digits, representing a single character
|
||||
|
@ -25,6 +25,7 @@ import re
|
||||
|
||||
from lxml import etree, objectify
|
||||
|
||||
from openlp.core.common import normalize_str
|
||||
from openlp.plugins.songs.lib import VerseType
|
||||
from openlp.plugins.songs.lib.importers.songimport import SongImport
|
||||
|
||||
@ -225,7 +226,7 @@ class EasySlidesImport(SongImport):
|
||||
verses[reg].setdefault(vt, {})
|
||||
verses[reg][vt].setdefault(vn, {})
|
||||
verses[reg][vt][vn].setdefault(inst, [])
|
||||
verses[reg][vt][vn][inst].append(self.tidy_text(line))
|
||||
verses[reg][vt][vn][inst].append(normalize_str(line))
|
||||
# done parsing
|
||||
versetags = []
|
||||
# we use our_verse_order to ensure, we insert lyrics in the same order
|
||||
|
@ -101,7 +101,7 @@ class MediaShoutImport(SongImport):
|
||||
self.song_book_name = song.SongID
|
||||
for verse in verses:
|
||||
tag = VERSE_TAGS[verse.Type] + str(verse.Number) if verse.Type < len(VERSE_TAGS) else 'O'
|
||||
self.add_verse(self.tidy_text(verse.Text), tag)
|
||||
self.add_verse(verse.Text, tag)
|
||||
for order in verse_order:
|
||||
if order.Type < len(VERSE_TAGS):
|
||||
self.verse_order_list.append(VERSE_TAGS[order.Type] + str(order.Number))
|
||||
|
@ -24,7 +24,7 @@ import time
|
||||
|
||||
from PyQt5 import QtCore
|
||||
|
||||
from openlp.core.common import is_win, get_uno_command, get_uno_instance
|
||||
from openlp.core.common import get_uno_command, get_uno_instance, is_win, normalize_str
|
||||
from openlp.core.common.i18n import translate
|
||||
from .songimport import SongImport
|
||||
|
||||
@ -241,7 +241,7 @@ class OpenOfficeImport(SongImport):
|
||||
|
||||
:param text: The text.
|
||||
"""
|
||||
song_texts = self.tidy_text(text).split('\f')
|
||||
song_texts = normalize_str(text).split('\f')
|
||||
self.set_defaults()
|
||||
for song_text in song_texts:
|
||||
if song_text.strip():
|
||||
|
@ -25,6 +25,7 @@ import re
|
||||
from lxml import objectify
|
||||
from lxml.etree import Error, LxmlError
|
||||
|
||||
from openlp.core.common import normalize_str
|
||||
from openlp.core.common.i18n import translate
|
||||
from openlp.core.common.settings import Settings
|
||||
from openlp.plugins.songs.lib import VerseType
|
||||
@ -262,7 +263,7 @@ class OpenSongImport(SongImport):
|
||||
post=this_line[offset + column:])
|
||||
offset += len(chord) + 2
|
||||
# Tidy text and remove the ____s from extended words
|
||||
this_line = self.tidy_text(this_line)
|
||||
this_line = normalize_str(this_line)
|
||||
this_line = this_line.replace('_', '')
|
||||
this_line = this_line.replace('||', '\n[---]\n')
|
||||
this_line = this_line.strip()
|
||||
|
@ -25,6 +25,7 @@ import re
|
||||
|
||||
from PyQt5 import QtCore
|
||||
|
||||
from openlp.core.common import normalize_str
|
||||
from openlp.core.common.applocation import AppLocation
|
||||
from openlp.core.common.i18n import translate
|
||||
from openlp.core.common.path import copyfile, create_paths
|
||||
@ -130,26 +131,6 @@ class SongImport(QtCore.QObject):
|
||||
def register(self, import_wizard):
|
||||
self.import_wizard = import_wizard
|
||||
|
||||
def tidy_text(self, text):
|
||||
"""
|
||||
Get rid of some dodgy unicode and formatting characters we're not interested in. Some can be converted to ascii.
|
||||
"""
|
||||
text = text.replace('\u2018', '\'')
|
||||
text = text.replace('\u2019', '\'')
|
||||
text = text.replace('\u201c', '"')
|
||||
text = text.replace('\u201d', '"')
|
||||
text = text.replace('\u2026', '...')
|
||||
text = text.replace('\u2013', '-')
|
||||
text = text.replace('\u2014', '-')
|
||||
# Replace vertical tab with 2 linebreaks
|
||||
text = text.replace('\v', '\n\n')
|
||||
# Replace form feed (page break) with 2 linebreaks
|
||||
text = text.replace('\f', '\n\n')
|
||||
# Remove surplus blank lines, spaces, trailing/leading spaces
|
||||
text = re.sub(r'[ \t]+', ' ', text)
|
||||
text = re.sub(r' ?(\r\n?|\n) ?', '\n', text)
|
||||
return text
|
||||
|
||||
def process_song_text(self, text):
|
||||
"""
|
||||
Process the song text from import
|
||||
@ -368,7 +349,7 @@ class SongImport(QtCore.QObject):
|
||||
verse_tag = VerseType.tags[VerseType.Other]
|
||||
log.info('Versetype {old} changing to {new}'.format(old=verse_def, new=new_verse_def))
|
||||
verse_def = new_verse_def
|
||||
sxml.add_verse_to_lyrics(verse_tag, verse_def[1:], verse_text, lang)
|
||||
sxml.add_verse_to_lyrics(verse_tag, verse_def[1:], normalize_str(verse_text), lang)
|
||||
song.lyrics = str(sxml.extract_xml(), 'utf-8')
|
||||
if not self.verse_order_list and self.verse_order_list_generated_useful:
|
||||
self.verse_order_list = self.verse_order_list_generated
|
||||
|
@ -194,7 +194,6 @@ class SongsOfFellowshipImport(OpenOfficeImport):
|
||||
:param text_portion: A Piece of text
|
||||
"""
|
||||
text = text_portion.getString()
|
||||
text = self.tidy_text(text)
|
||||
if text.strip() == '':
|
||||
return text
|
||||
if text_portion.CharWeight == BOLD:
|
||||
|
@ -281,7 +281,7 @@ class OpenLyrics(object):
|
||||
# Process the formatting tags.
|
||||
# Have we any tags in song lyrics?
|
||||
tags_element = None
|
||||
match = re.search('\{/?\w+\}', song.lyrics, re.UNICODE)
|
||||
match = re.search(r'\{/?\w+\}', song.lyrics)
|
||||
if match:
|
||||
# Named 'format_' - 'format' is built-in function in Python.
|
||||
format_ = etree.SubElement(song_xml, 'format')
|
||||
|
Loading…
Reference in New Issue
Block a user