openlp/openlp/plugins/songs/lib/__init__.py

545 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
# Copyright (c) 2008-2017 OpenLP Developers #
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
"""
The :mod:`~openlp.plugins.songs.lib` module contains a number of library functions and classes used in the Songs plugin.
"""
import logging
import os
import re
from PyQt5 import QtWidgets
from openlp.core.common import AppLocation
from openlp.core.lib import translate, clean_tags
from openlp.core.utils import CONTROL_CHARS
from openlp.plugins.songs.lib.db import Author, MediaFile, Song, Topic
from openlp.plugins.songs.lib.ui import SongStrings
log = logging.getLogger(__name__)
WHITESPACE = re.compile(r'[\W_]+', re.UNICODE)
APOSTROPHE = re.compile('[\'`ʻ]', re.UNICODE)
# PATTERN will look for the next occurence of one of these symbols:
# \controlword - optionally preceded by \*, optionally followed by a number
# \'## - where ## is a pair of hex digits, representing a single character
# \# - where # is a single non-alpha character, representing a special symbol
# { or } - marking the beginning/end of a group
# a run of characters without any \ { } or end-of-line
PATTERN = re.compile(
r"(\\\*)?\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z*])|([{}])|[\r\n]+|([^\\{}\r\n]+)", re.I)
# RTF control words which specify a "destination" to be ignored.
DESTINATIONS = frozenset((
'aftncn', 'aftnsep', 'aftnsepc', 'annotation', 'atnauthor', 'atndate', 'atnicn', 'atnid', 'atnparent', 'atnref',
'atntime', 'atrfend', 'atrfstart', 'author', 'background', 'bkmkend', 'bkmkstart', 'blipuid', 'buptim', 'category',
'colorschememapping', 'colortbl', 'comment', 'company', 'creatim', 'datafield', 'datastore', 'defchp', 'defpap',
'do', 'doccomm', 'docvar', 'dptxbxtext', 'ebcend', 'ebcstart', 'factoidname', 'falt', 'fchars', 'ffdeftext',
'ffentrymcr', 'ffexitmcr', 'ffformat', 'ffhelptext', 'ffl', 'ffname', 'ffstattext', 'file', 'filetbl', 'fldinst',
'fldtype', 'fname', 'fontemb', 'fontfile', 'footer', 'footerf', 'footerl', 'footerr', 'footnote', 'formfield',
'ftncn', 'ftnsep', 'ftnsepc', 'g', 'generator', 'gridtbl', 'header', 'headerf', 'headerl', 'headerr', 'hl', 'hlfr',
'hlinkbase', 'hlloc', 'hlsrc', 'hsv', 'htmltag', 'info', 'keycode', 'keywords', 'latentstyles', 'lchars',
'levelnumbers', 'leveltext', 'lfolevel', 'linkval', 'list', 'listlevel', 'listname', 'listoverride',
'listoverridetable', 'listpicture', 'liststylename', 'listtable', 'listtext', 'lsdlockedexcept', 'macc', 'maccPr',
'mailmerge', 'maln', 'malnScr', 'manager', 'margPr', 'mbar', 'mbarPr', 'mbaseJc', 'mbegChr', 'mborderBox',
'mborderBoxPr', 'mbox', 'mboxPr', 'mchr', 'mcount', 'mctrlPr', 'md', 'mdeg', 'mdegHide', 'mden', 'mdiff', 'mdPr',
'me', 'mendChr', 'meqArr', 'meqArrPr', 'mf', 'mfName', 'mfPr', 'mfunc', 'mfuncPr', 'mgroupChr', 'mgroupChrPr',
'mgrow', 'mhideBot', 'mhideLeft', 'mhideRight', 'mhideTop', 'mhtmltag', 'mlim', 'mlimloc', 'mlimlow', 'mlimlowPr',
'mlimupp', 'mlimuppPr', 'mm', 'mmaddfieldname', 'mmath', 'mmathPict', 'mmathPr', 'mmaxdist', 'mmc', 'mmcJc',
'mmconnectstr', 'mmconnectstrdata', 'mmcPr', 'mmcs', 'mmdatasource', 'mmheadersource', 'mmmailsubject', 'mmodso',
'mmodsofilter', 'mmodsofldmpdata', 'mmodsomappedname', 'mmodsoname', 'mmodsorecipdata', 'mmodsosort', 'mmodsosrc',
'mmodsotable', 'mmodsoudl', 'mmodsoudldata', 'mmodsouniquetag', 'mmPr', 'mmquery', 'mmr', 'mnary', 'mnaryPr',
'mnoBreak', 'mnum', 'mobjDist', 'moMath', 'moMathPara', 'moMathParaPr', 'mopEmu', 'mphant', 'mphantPr', 'mplcHide',
'mpos', 'mr', 'mrad', 'mradPr', 'mrPr', 'msepChr', 'mshow', 'mshp', 'msPre', 'msPrePr', 'msSub', 'msSubPr',
'msSubSup', 'msSubSupPr', 'msSup', 'msSupPr', 'mstrikeBLTR', 'mstrikeH', 'mstrikeTLBR', 'mstrikeV', 'msub',
'msubHide', 'msup', 'msupHide', 'mtransp', 'mtype', 'mvertJc', 'mvfmf', 'mvfml', 'mvtof', 'mvtol', 'mzeroAsc',
'mzFrodesc', 'mzeroWid', 'nesttableprops', 'nextfile', 'nonesttables', 'objalias', 'objclass', 'objdata', 'object',
'objname', 'objsect', 'objtime', 'oldcprops', 'oldpprops', 'oldsprops', 'oldtprops', 'oleclsid', 'operator',
'panose', 'password', 'passwordhash', 'pgp', 'pgptbl', 'picprop', 'pict', 'pn', 'pnseclvl', 'pntext', 'pntxta',
'pntxtb', 'printim', 'private', 'propname', 'protend', 'protstart', 'protusertbl', 'pxe', 'result', 'revtbl',
'revtim', 'rsidtbl', 'rxe', 'shp', 'shpgrp', 'shpinst', 'shppict', 'shprslt', 'shptxt', 'sn', 'sp', 'staticval',
'stylesheet', 'subject', 'sv', 'svb', 'tc', 'template', 'themedata', 'title', 'txe', 'ud', 'upr', 'userprops',
'wgrffmtfilter', 'windowcaption', 'writereservation', 'writereservhash', 'xe', 'xform', 'xmlattrname',
'xmlattrvalue', 'xmlclose', 'xmlname', 'xmlnstbl', 'xmlopen'
))
# Translation of some special characters.
SPECIAL_CHARS = {
'\n': '\n',
'\r': '\n',
'~': '\u00A0',
'-': '\u00AD',
'_': '\u2011',
'par': '\n',
'sect': '\n\n',
# Required page and column break.
# Would be good if we could split verse into subverses here.
'page': '\n\n',
'column': '\n\n',
# Soft breaks.
'softpage': '[---]',
'softcol': '[---]',
'line': '\n',
'tab': '\t',
'emdash': '\u2014',
'endash': '\u2013',
'emspace': '\u2003',
'enspace': '\u2002',
'qmspace': '\u2005',
'bullet': '\u2022',
'lquote': '\u2018',
'rquote': '\u2019',
'ldblquote': '\u201C',
'rdblquote': '\u201D',
'ltrmark': '\u200E',
'rtlmark': '\u200F',
'zwj': '\u200D',
'zwnj': '\u200C'
}
CHARSET_MAPPING = {
'0': 'cp1252',
'128': 'cp932',
'129': 'cp949',
'134': 'cp936',
'161': 'cp1253',
'162': 'cp1254',
'163': 'cp1258',
'177': 'cp1255',
'178': 'cp1256',
'186': 'cp1257',
'204': 'cp1251',
'222': 'cp874',
'238': 'cp1250'
}
class VerseType(object):
"""
VerseType provides an enumeration for the tags that may be associated with verses in songs.
"""
Verse = 0
Chorus = 1
Bridge = 2
PreChorus = 3
Intro = 4
Ending = 5
Other = 6
names = ['Verse', 'Chorus', 'Bridge', 'Pre-Chorus', 'Intro', 'Ending', 'Other']
tags = [name[0].lower() for name in names]
translated_names = [
translate('SongsPlugin.VerseType', 'Verse'),
translate('SongsPlugin.VerseType', 'Chorus'),
translate('SongsPlugin.VerseType', 'Bridge'),
translate('SongsPlugin.VerseType', 'Pre-Chorus'),
translate('SongsPlugin.VerseType', 'Intro'),
translate('SongsPlugin.VerseType', 'Ending'),
translate('SongsPlugin.VerseType', 'Other')]
translated_tags = [name[0].lower() for name in translated_names]
@staticmethod
def translated_tag(verse_tag, default=Other):
"""
Return the translated UPPERCASE tag for a given tag, used to show translated verse tags in UI
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: A translated UPPERCASE tag
"""
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.tags):
if verse_tag == tag:
return VerseType.translated_tags[num].upper()
if len(VerseType.names) > default:
return VerseType.translated_tags[default].upper()
else:
return VerseType.translated_tags[VerseType.Other].upper()
@staticmethod
def translated_name(verse_tag, default=Other):
"""
Return the translated name for a given tag
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: Translated name for the given tag
"""
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.tags):
if verse_tag == tag:
return VerseType.translated_names[num]
if len(VerseType.names) > default:
return VerseType.translated_names[default]
else:
return VerseType.translated_names[VerseType.Other]
@staticmethod
def from_tag(verse_tag, default=Other):
"""
Return the VerseType for a given tag
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found (a valid VerseType or None)
:return: A VerseType of the tag
"""
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.tags):
if verse_tag == tag:
return num
if default in range(0, len(VerseType.names)) or default is None:
return default
else:
return VerseType.Other
@staticmethod
def from_translated_tag(verse_tag, default=Other):
"""
Return the VerseType for a given tag
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: The VerseType of a translated tag
"""
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.translated_tags):
if verse_tag == tag:
return num
if default in range(0, len(VerseType.names)) or default is None:
return default
else:
return VerseType.Other
@staticmethod
def from_string(verse_name, default=Other):
"""
Return the VerseType for a given string
:param verse_name: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: The VerseType determined from the string
"""
verse_name = verse_name.lower()
for num, name in enumerate(VerseType.names):
if verse_name == name.lower():
return num
return default
@staticmethod
def from_translated_string(verse_name):
"""
Return the VerseType for a given string
:param verse_name: The string to return a VerseType for
:return: A VerseType
"""
verse_name = verse_name.lower()
for num, translation in enumerate(VerseType.translated_names):
if verse_name == translation.lower():
return num
return None
@staticmethod
def from_loose_input(verse_name, default=Other):
"""
Return the VerseType for a given string
:param verse_name: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: A VerseType
"""
if len(verse_name) > 1:
verse_index = VerseType.from_translated_string(verse_name)
if verse_index is None:
verse_index = VerseType.from_string(verse_name, default)
elif len(verse_name) == 1:
verse_index = VerseType.from_translated_tag(verse_name, None)
if verse_index is None:
verse_index = VerseType.from_tag(verse_name, default)
else:
return default
return verse_index
def retrieve_windows_encoding(recommendation=None):
"""
Determines which encoding to use on an information source. The process uses both automated detection, which is
passed to this method as a recommendation, and user confirmation to return an encoding.
:param recommendation: A recommended encoding discovered programmatically for the user to confirm.
:return: A list of recommended encodings, or None
"""
# map chardet result to compatible windows standard code page
codepage_mapping = {'IBM866': 'cp866', 'TIS-620': 'cp874', 'SHIFT_JIS': 'cp932', 'GB2312': 'cp936',
'HZ-GB-2312': 'cp936', 'EUC-KR': 'cp949', 'Big5': 'cp950', 'ISO-8859-2': 'cp1250',
'windows-1250': 'cp1250', 'windows-1251': 'cp1251', 'windows-1252': 'cp1252',
'ISO-8859-7': 'cp1253', 'windows-1253': 'cp1253', 'ISO-8859-8': 'cp1255',
'windows-1255': 'cp1255'}
if recommendation in codepage_mapping:
recommendation = codepage_mapping[recommendation]
# Show dialog for encoding selection
encodings = [
('cp1256', translate('SongsPlugin', 'Arabic (CP-1256)')),
('cp1257', translate('SongsPlugin', 'Baltic (CP-1257)')),
('cp1250', translate('SongsPlugin', 'Central European (CP-1250)')),
('cp1251', translate('SongsPlugin', 'Cyrillic (CP-1251)')),
('cp1253', translate('SongsPlugin', 'Greek (CP-1253)')),
('cp1255', translate('SongsPlugin', 'Hebrew (CP-1255)')),
('cp932', translate('SongsPlugin', 'Japanese (CP-932)')),
('cp949', translate('SongsPlugin', 'Korean (CP-949)')),
('cp936', translate('SongsPlugin', 'Simplified Chinese (CP-936)')),
('cp874', translate('SongsPlugin', 'Thai (CP-874)')),
('cp950', translate('SongsPlugin', 'Traditional Chinese (CP-950)')),
('cp1254', translate('SongsPlugin', 'Turkish (CP-1254)')),
('cp1258', translate('SongsPlugin', 'Vietnam (CP-1258)')),
('cp1252', translate('SongsPlugin', 'Western European (CP-1252)'))
]
recommended_index = -1
if recommendation:
for index in range(len(encodings)):
if recommendation == encodings[index][0]:
recommended_index = index
break
if recommended_index > -1:
choice = QtWidgets.QInputDialog.getItem(
None,
translate('SongsPlugin', 'Character Encoding'),
translate('SongsPlugin', 'The codepage setting is responsible\n'
'for the correct character representation.\n'
'Usually you are fine with the preselected choice.'),
[pair[1] for pair in encodings], recommended_index, False)
else:
choice = QtWidgets.QInputDialog.getItem(
None,
translate('SongsPlugin', 'Character Encoding'),
translate('SongsPlugin', 'Please choose the character encoding.\n'
'The encoding is responsible for the correct character representation.'),
[pair[1] for pair in encodings], 0, False)
if not choice[1]:
return None
return next(filter(lambda item: item[1] == choice[0], encodings))[0]
def clean_string(string):
"""
Strips punctuation from the passed string to assist searching.
:param string: The string to clean
:return: A clean string
"""
return WHITESPACE.sub(' ', APOSTROPHE.sub('', string)).lower()
def clean_title(title):
"""
Cleans the song title by removing Unicode control chars groups C0 & C1, as well as any trailing spaces.
:param title: The song title to clean
:return: A clean title
"""
return CONTROL_CHARS.sub('', title).rstrip()
def clean_song(manager, song):
"""
Cleans the search title, rebuilds the search lyrics, adds a default author if the song does not have one and other
clean ups. This should always called when a new song is added or changed.
:param manager: The song database manager object.
:param song: The song object.
"""
from .openlyricsxml import SongXML
if song.title:
song.title = clean_title(song.title)
else:
song.title = ''
if song.alternate_title:
song.alternate_title = clean_title(song.alternate_title)
else:
song.alternate_title = ''
song.search_title = clean_string(song.title) + '@' + clean_string(song.alternate_title)
if isinstance(song.lyrics, bytes):
song.lyrics = str(song.lyrics, encoding='utf8')
verses = SongXML().get_verses(song.lyrics)
song.search_lyrics = ' '.join([clean_string(clean_tags(verse[1])) for verse in verses])
# The song does not have any author, add one.
if not song.authors_songs:
name = SongStrings.AuthorUnknown
author = manager.get_object_filtered(Author, Author.display_name == name)
if author is None:
author = Author.populate(display_name=name, last_name='', first_name='')
song.add_author(author)
if song.copyright:
song.copyright = CONTROL_CHARS.sub('', song.copyright).strip()
def get_encoding(font, font_table, default_encoding, failed=False):
"""
Finds an encoding to use. Asks user, if necessary.
:param font: The number of currently active font.
:param font_table: Dictionary of fonts and respective encodings.
:param default_encoding: The default encoding to use when font_table is empty or no font is used.
:param failed: A boolean indicating whether the previous encoding didn't work.
"""
encoding = None
if font in font_table:
encoding = font_table[font]
if not encoding and default_encoding:
encoding = default_encoding
if not encoding or failed:
encoding = retrieve_windows_encoding()
default_encoding = encoding
font_table[font] = encoding
return encoding, default_encoding
def strip_rtf(text, default_encoding=None):
"""
This function strips RTF control structures and returns an unicode string.
Thanks to Markus Jarderot (MizardX) for this code, used by permission. http://stackoverflow.com/questions/188545
:param text: RTF-encoded text, a string.
:param default_encoding: Default encoding to use when no encoding is specified.
:return: A tuple ``(text, encoding)`` where ``text`` is the clean text and ``encoding`` is the detected encoding
"""
# Current font is the font tag we last met.
font = ''
# Character encoding is defined inside fonttable.
# font_table could contain eg '0': u'cp1252'
font_table = {'': ''}
# Stack of things to keep track of when entering/leaving groups.
stack = []
# Whether this group (and all inside it) are "ignorable".
ignorable = False
# Number of ASCII characters to skip after an unicode character.
ucskip = 1
# Number of ASCII characters left to skip.
curskip = 0
# Output buffer.
out = []
# Encoded buffer.
ebytes = bytearray()
for match in PATTERN.finditer(text):
iinu, word, arg, hex, char, brace, tchar = match.groups()
# \x (non-alpha character)
if char:
if char in '\\{}':
tchar = char
else:
word = char
# Flush encoded buffer to output buffer
if ebytes and not hex and not tchar:
failed = False
while True:
try:
encoding, default_encoding = get_encoding(font, font_table, default_encoding, failed=failed)
if not encoding:
return None
dbytes = ebytes.decode(encoding)
# Code 5C is a peculiar case with Windows Codepage 932
if encoding == 'cp932' and '\\' in dbytes:
dbytes = dbytes.replace('\\', '\u00A5')
out.append(dbytes)
ebytes.clear()
except UnicodeDecodeError:
failed = True
else:
break
# {}
if brace:
curskip = 0
if brace == '{':
# Push state
stack.append((ucskip, ignorable, font))
elif brace == '}' and len(stack) > 0:
# Pop state
ucskip, ignorable, font = stack.pop()
# \command
elif word:
curskip = 0
if word in DESTINATIONS:
ignorable = True
elif word in SPECIAL_CHARS:
if not ignorable:
out.append(SPECIAL_CHARS[word])
elif word == 'uc':
ucskip = int(arg)
elif word == 'u':
c = int(arg)
if c < 0:
c += 0x10000
if not ignorable:
out.append(chr(c))
curskip = ucskip
elif word == 'fonttbl':
ignorable = True
elif word == 'f':
font = arg
elif word == 'ansicpg':
font_table[font] = 'cp' + arg
elif word == 'fcharset' and font not in font_table and arg in CHARSET_MAPPING:
font_table[font] = CHARSET_MAPPING[arg]
elif word == 'fldrslt':
pass
# \* 'Ignore if not understood' marker
elif iinu:
ignorable = True
# \'xx
elif hex:
if curskip > 0:
curskip -= 1
elif not ignorable:
ebytes.append(int(hex, 16))
elif tchar:
if curskip > 0:
curskip -= 1
elif not ignorable:
ebytes += tchar.encode()
text = ''.join(out)
return text, default_encoding
def delete_song(song_id, song_plugin):
"""
Deletes a song from the database. Media files associated to the song are removed prior to the deletion of the song.
:param song_id: The ID of the song to delete.
:param song_plugin: The song plugin instance.
"""
save_path = ''
media_files = song_plugin.manager.get_all_objects(MediaFile, MediaFile.song_id == song_id)
for media_file in media_files:
try:
os.remove(media_file.file_name)
except OSError:
log.exception('Could not remove file: %s', media_file.file_name)
try:
save_path = os.path.join(AppLocation.get_section_data_path(song_plugin.name), 'audio', str(song_id))
if os.path.exists(save_path):
os.rmdir(save_path)
except OSError:
log.exception('Could not remove directory: %s', save_path)
song_plugin.manager.delete_object(Song, song_id)