@ -53,64 +53,41 @@ APOSTROPHE = re.compile('[\'`ʻ]', re.UNICODE)
# \# - where # is a single non-alpha character, representing a special symbol
# { or } - marking the beginning/end of a group
# a run of characters without any \ { } or end-of-line
PATTERN = re.compile(r"(\\\*)?\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z*])|([{}])|[\r\n]+|([^\\{}\r\n]+)", re.I)
PATTERN = re.compile(
r"(\\\*)?\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z*])|([{}])|[\r\n]+|([^\\{}\r\n]+)", re.I)
# RTF control words which specify a "destination" to be ignored.
'aftncn', 'aftnsep', 'aftnsepc', 'annotation', 'atnauthor', 'atndate', 'atnicn', 'atnid', 'atnparent', 'atnref',
'atntime', 'atrfend', 'atrfstart', 'author', 'background', 'bkmkend', 'bkmkstart', 'blipuid', 'buptim', 'category',
'colorschememapping', 'colortbl', 'comment', 'company', 'creatim', 'datafield', 'datastore', 'defchp', 'defpap',
'do', 'doccomm', 'docvar', 'dptxbxtext', 'ebcend', 'ebcstart', 'factoidname', 'falt', 'fchars', 'ffdeftext',
'ffentrymcr', 'ffexitmcr', 'ffformat', 'ffhelptext', 'ffl', 'ffname', 'ffstattext', 'file', 'filetbl', 'fldinst',
'fldtype', 'fname', 'fontemb', 'fontfile', 'footer', 'footerf', 'footerl', 'footerr', 'footnote', 'formfield',
'ftncn', 'ftnsep', 'ftnsepc', 'g', 'generator', 'gridtbl', 'header', 'headerf', 'headerl', 'headerr', 'hl', 'hlfr',
'hlinkbase', 'hlloc', 'hlsrc', 'hsv', 'htmltag', 'info', 'keycode', 'keywords', 'latentstyles', 'lchars',
'levelnumbers', 'leveltext', 'lfolevel', 'linkval', 'list', 'listlevel', 'listname', 'listoverride',
'listoverridetable', 'listpicture', 'liststylename', 'listtable', 'listtext', 'lsdlockedexcept', 'macc', 'maccPr',
'mailmerge', 'maln', 'malnScr', 'manager', 'margPr', 'mbar', 'mbarPr', 'mbaseJc', 'mbegChr', 'mborderBox',
'mborderBoxPr', 'mbox', 'mboxPr', 'mchr', 'mcount', 'mctrlPr', 'md', 'mdeg', 'mdegHide', 'mden', 'mdiff', 'mdPr',
'me', 'mendChr', 'meqArr', 'meqArrPr', 'mf', 'mfName', 'mfPr', 'mfunc', 'mfuncPr', 'mgroupChr', 'mgroupChrPr',
'mgrow', 'mhideBot', 'mhideLeft', 'mhideRight', 'mhideTop', 'mhtmltag', 'mlim', 'mlimloc', 'mlimlow', 'mlimlowPr',
'mlimupp', 'mlimuppPr', 'mm', 'mmaddfieldname', 'mmath', 'mmathPict', 'mmathPr', 'mmaxdist', 'mmc', 'mmcJc',
'mmconnectstr', 'mmconnectstrdata', 'mmcPr', 'mmcs', 'mmdatasource', 'mmheadersource', 'mmmailsubject', 'mmodso',
'mmodsofilter', 'mmodsofldmpdata', 'mmodsomappedname', 'mmodsoname', 'mmodsorecipdata', 'mmodsosort', 'mmodsosrc',
'mmodsotable', 'mmodsoudl', 'mmodsoudldata', 'mmodsouniquetag', 'mmPr', 'mmquery', 'mmr', 'mnary', 'mnaryPr',
'mnoBreak', 'mnum', 'mobjDist', 'moMath', 'moMathPara', 'moMathParaPr', 'mopEmu', 'mphant', 'mphantPr', 'mplcHide',
'mpos', 'mr', 'mrad', 'mradPr', 'mrPr', 'msepChr', 'mshow', 'mshp', 'msPre', 'msPrePr', 'msSub', 'msSubPr',
'msSubSup', 'msSubSupPr', 'msSup', 'msSupPr', 'mstrikeBLTR', 'mstrikeH', 'mstrikeTLBR', 'mstrikeV', 'msub',
'msubHide', 'msup', 'msupHide', 'mtransp', 'mtype', 'mvertJc', 'mvfmf', 'mvfml', 'mvtof', 'mvtol', 'mzeroAsc',
'mzFrodesc', 'mzeroWid', 'nesttableprops', 'nextfile', 'nonesttables', 'objalias', 'objclass', 'objdata', 'object',
'objname', 'objsect', 'objtime', 'oldcprops', 'oldpprops', 'oldsprops', 'oldtprops', 'oleclsid', 'operator',
'panose', 'password', 'passwordhash', 'pgp', 'pgptbl', 'picprop', 'pict', 'pn', 'pnseclvl', 'pntext', 'pntxta',
'pntxtb', 'printim', 'private', 'propname', 'protend', 'protstart', 'protusertbl', 'pxe', 'result', 'revtbl',
'revtim', 'rsidtbl', 'rxe', 'shp', 'shpgrp', 'shpinst', 'shppict', 'shprslt', 'shptxt', 'sn', 'sp', 'staticval',
'stylesheet', 'subject', 'sv', 'svb', 'tc', 'template', 'themedata', 'title', 'txe', 'ud', 'upr', 'userprops',
'wgrffmtfilter', 'windowcaption', 'writereservation', 'writereservhash', 'xe', 'xform', 'xmlattrname',
'xmlattrvalue', 'xmlclose', 'xmlname', 'xmlnstbl', 'xmlopen'
# Translation of some special characters.
'\n': '\n',
@ -142,7 +119,8 @@ SPECIAL_CHARS = {
'ltrmark': '\u200E',
'rtlmark': '\u200F',
'zwj': '\u200D',
'zwnj': '\u200C'
'0': 'cp1252',
'128': 'cp932',
@ -156,7 +134,8 @@ CHARSET_MAPPING = {
'186': 'cp1257',
'204': 'cp1251',
'222': 'cp874',
'238': 'cp1250'
class VerseType(object):
@ -171,14 +150,7 @@ class VerseType(object):
Ending = 5
Other = 6
names = ['Verse', 'Chorus', 'Bridge', 'Pre-Chorus', 'Intro', 'Ending', 'Other']
tags = [name[0].lower() for name in names]
Return the translated UPPERCASE tag for a given tag, used to show translated verse tags in UI
The string to return a VerseType for
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: A translated UPPERCASE tag
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.tags):
@ -217,11 +187,9 @@ class VerseType(object):
Return the translated name for a given tag
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: Translated name for the given tag
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.tags):
@ -237,11 +205,9 @@ class VerseType(object):
Return the VerseType for a given tag
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: A VerseType of the tag
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.tags):
@ -257,11 +223,9 @@ class VerseType(object):
Return the VerseType for a given tag
:param verse_tag: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: The VerseType of a translated tag
verse_tag = verse_tag[0].lower()
for num, tag in enumerate(VerseType.translated_tags):
@ -277,11 +241,9 @@ class VerseType(object):
Return the VerseType for a given string
:param verse_name: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: The VerseType determined from the string
verse_name = verse_name.lower()
for num, name in enumerate(VerseType.names):
@ -294,8 +256,8 @@ class VerseType(object):
Return the VerseType for a given string
:param verse_name: The string to return a VerseType for
:return: A VerseType
verse_name = verse_name.lower()
for num, translation in enumerate(VerseType.translated_names):
@ -307,11 +269,9 @@ class VerseType(object):
Return the VerseType for a given string
:param verse_name: The string to return a VerseType for
:param default: Default return value if no matching tag is found
:return: A VerseType
if len(verse_name) > 1:
verse_index = VerseType.from_translated_string(verse_name)
@ -331,22 +291,21 @@ def retrieve_windows_encoding(recommendation=None):
Determines which encoding to use on an information source. The process uses both automated detection, which is
passed to this method as a recommendation, and user confirmation to return an encoding.
:param recommendation: A recommended encoding discovered programmatically for the user to confirm.
:return: A list of recommended encodings, or None
# map chardet result to compatible windows standard code page
codepage_mapping = {'IBM866': 'cp866', 'TIS-620': 'cp874', 'SHIFT_JIS': 'cp932', 'GB2312': 'cp936',
'HZ-GB-2312': 'cp936', 'EUC-KR': 'cp949', 'Big5': 'cp950', 'ISO-8859-2': 'cp1250',
'windows-1250': 'cp1250', 'windows-1251': 'cp1251', 'windows-1252': 'cp1252',
'ISO-8859-7': 'cp1253', 'windows-1253': 'cp1253', 'ISO-8859-8': 'cp1255',
'windows-1255': 'cp1255'}
if recommendation in codepage_mapping:
recommendation = codepage_mapping[recommendation]
# Show dialog for encoding selection
encodings = [
('cp1256', translate('SongsPlugin', 'Arabic (CP-1256)')),
('cp1257', translate('SongsPlugin', 'Baltic (CP-1257)')),
('cp1250', translate('SongsPlugin', 'Central European (CP-1250)')),
('cp1251', translate('SongsPlugin', 'Cyrillic (CP-1251)')),
@ -359,7 +318,8 @@ def retrieve_windows_encoding(recommendation=None):
('cp950', translate('SongsPlugin', 'Traditional Chinese (CP-950)')),
('cp1254', translate('SongsPlugin', 'Turkish (CP-1254)')),
('cp1258', translate('SongsPlugin', 'Vietnam (CP-1258)')),
('cp1252', translate('SongsPlugin', 'Western European (CP-1252)'))]
('cp1252', translate('SongsPlugin', 'Western European (CP-1252)'))
recommended_index = -1
if recommendation:
for index in range(len(encodings)):
@ -367,17 +327,20 @@ def retrieve_windows_encoding(recommendation=None):
recommended_index = index
if recommended_index > -1:
choice = QtGui.QInputDialog.getItem(
translate('SongsPlugin', 'Character Encoding'),
'for the correct character representation.\n'
'Usually you are fine with the preselected choice.'),
[pair[1] for pair in encodings], recommended_index, False)
choice = QtGui.QInputDialog.getItem(
translate('SongsPlugin', 'Character Encoding'),
[pair[1] for pair in encodings], 0, False)
'The encoding is responsible for the correct character representation.'),
[pair[1] for pair in encodings], 0, False)
if not choice[1]:
return None
return next(filter(lambda item: item[1] == choice[0], encodings))[0]
@ -386,6 +349,9 @@ def retrieve_windows_encoding(recommendation=None):
def clean_string(string):
Strips punctuation from the passed string to assist searching.
:param string: The string to clean
:return: A clean string
return WHITESPACE.sub(' ', APOSTROPHE.sub('', string)).lower()
@ -393,6 +359,9 @@ def clean_string(string):
def clean_title(title):
Cleans the song title by removing Unicode control chars groups C0 & C1, as well as any trailing spaces.
:param title: The song title to clean
:return: A clean title
return CONTROL_CHARS.sub('', title).rstrip()
@ -402,11 +371,8 @@ def clean_song(manager, song):
Cleans the search title, rebuilds the search lyrics, adds a default author if the song does not have one and other
clean ups. This should always called when a new song is added or changed.
The song's manager.
The song object.
:param manager: The song database manager object.
:param song: The song object.
from .xml import SongXML
@ -419,55 +385,10 @@ def clean_song(manager, song):
song.alternate_title = ''
song.search_title = clean_string(song.title) + '@' + clean_string(song.alternate_title)
if isinstance(song.lyrics, bytes):
song.lyrics = str(song.lyrics, encoding='utf8')
verses = SongXML().get_verses(song.lyrics)
song.search_lyrics = ' '.join([clean_string(verse[1]) for verse in verses])
# The song does not have any author, add one.
if not song.authors:
@ -484,17 +405,10 @@ def get_encoding(font, font_table, default_encoding, failed=False):
Finds an encoding to use. Asks user, if necessary.
:param font: The number of currently active font.
:param font_table: Dictionary of fonts and respective encodings.
:param default_encoding: The default encoding to use when font_table is empty or no font is used.
:param failed: A boolean indicating whether the previous encoding didn't work.
encoding = None
if font in font_table:
@ -512,14 +426,11 @@ def strip_rtf(text, default_encoding=None):
This function strips RTF control structures and returns an unicode string.
Thanks to Markus Jarderot (MizardX) for this code, used by permission.
Thanks to Markus Jarderot (MizardX) for this code, used by permission. http://stackoverflow.com/questions/188545
:param text: RTF-encoded text, a string.
:param default_encoding: Default encoding to use when no encoding is specified.
:return: A tuple ``(text, encoding)`` where ``text`` is the clean text and ``encoding`` is the detected encoding
# Current font is the font tag we last met.
font = ''
@ -620,20 +531,17 @@ def strip_rtf(text, default_encoding=None):
def delete_song(song_id, song_plugin):
Deletes a song from the database. Media files associated to the song
Deletes a song from the database. Media files associated to the song are removed prior to the deletion of the song.
:param song_id: The ID of the song to delete.
:param song_plugin: The song plugin instance.
save_path = ''
media_files = song_plugin.manager.get_all_objects(MediaFile, MediaFile.song_id == song_id)
for media_file in media_files:
except OSError:
log.exception('Could not remove file: %s', media_file.file_name)
save_path = os.path.join(AppLocation.get_section_data_path(song_plugin.name), 'audio', str(song_id))