added a song clean up function

This commit is contained in:
Andreas Preikschat 2011-03-14 19:59:59 +01:00
parent 33dcac7145
commit 97bec5b93f
7 changed files with 65 additions and 114 deletions

View File

@ -23,6 +23,7 @@
# with this program; if not, write to the Free Software Foundation, Inc., 59 # # with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Temple Place, Suite 330, Boston, MA 02111-1307 USA #
############################################################################### ###############################################################################
import re
from PyQt4 import QtGui from PyQt4 import QtGui
@ -244,9 +245,11 @@ def retrieve_windows_encoding(recommendation=None):
return None return None
return filter(lambda item: item[1] == choice[0], encodings)[0][0] return filter(lambda item: item[1] == choice[0], encodings)[0][0]
def add_author_unknown(manager, song): def clean_song(manager, song):
""" """
Add the default author *Author Unknown* to the song. Cleans the serach title, rebuilds the serach lyrics, adds a default author
if the song does not have one and other clean ups. This should alsways
called when a new song is added.
``manager`` ``manager``
The song's manager. The song's manager.
@ -254,12 +257,30 @@ def add_author_unknown(manager, song):
``song`` ``song``
The song object. The song object.
""" """
name = SongStrings.AuthorUnknown # The song does not have any author, add one.
author = manager.get_object_filtered(Author, Author.display_name == name) if not song.authors:
if author is None: name = SongStrings.AuthorUnknown
author = Author.populate( author = manager.get_object_filtered(
display_name=name, last_name=u'', first_name=u'') Author, Author.display_name == name)
song.authors.append(author) if author is None:
author = Author.populate(
display_name=name, last_name=u'', first_name=u'')
song.authors.append(author)
song.title = song.title.strip() if song.title else u''
if song.alternate_title is None:
song.alternate_title = u''
song.alternate_title = song.alternate_title.strip()
whitespace = re.compile(r'\W+', re.UNICODE)
search_title = (whitespace.sub(u' ', song.title.lower()) +
u'@' + whitespace.sub(u' ', song.alternate_title.lower())).strip()
song.search_title = search_title if search_title else u''
# Remove the "language" attribute from lyrics tag. This is not very
# important, but this keeps the database clean. This can be removed
# when everybody has cleaned his songs.
song.lyrics = song.lyrics.replace(u'<lyrics language="en">', u'<lyrics>')
verses = SongXML().get_verses(song.lyrics)
lyrics = u' '.join([whitespace.sub(u' ', verse[1]) for verse in verses])
song.search_lyrics = lyrics.lower()
from xml import OpenLyrics, SongXML from xml import OpenLyrics, SongXML
from songstab import SongsTab from songstab import SongsTab

View File

@ -94,7 +94,7 @@ import os
from lxml import etree, objectify from lxml import etree, objectify
from openlp.core.ui.wizard import WizardStrings from openlp.core.ui.wizard import WizardStrings
from openlp.plugins.songs.lib import add_author_unknown, VerseType from openlp.plugins.songs.lib import clean_song, VerseType
from openlp.plugins.songs.lib.songimport import SongImport from openlp.plugins.songs.lib.songimport import SongImport
from openlp.plugins.songs.lib.db import Author, Book, Song, Topic from openlp.plugins.songs.lib.db import Author, Book, Song, Topic
from openlp.plugins.songs.lib.xml import SongXML from openlp.plugins.songs.lib.xml import SongXML
@ -229,6 +229,7 @@ class FoilPresenter(object):
self._process_authors(foilpresenterfolie, song) self._process_authors(foilpresenterfolie, song)
self._process_songbooks(foilpresenterfolie, song) self._process_songbooks(foilpresenterfolie, song)
self._process_topics(foilpresenterfolie, song) self._process_topics(foilpresenterfolie, song)
clean_song(self.manager, song)
self.manager.save_object(song) self.manager.save_object(song)
return song.id return song.id
@ -348,8 +349,6 @@ class FoilPresenter(object):
first_name = u' '.join(display_name.split(u' ')[:-1])) first_name = u' '.join(display_name.split(u' ')[:-1]))
self.manager.save_object(author) self.manager.save_object(author)
song.authors.append(author) song.authors.append(author)
if not song.authors:
add_author_unknown(self.manager, song)
def _process_cclinumber(self, foilpresenterfolie, song): def _process_cclinumber(self, foilpresenterfolie, song):
""" """
@ -407,7 +406,6 @@ class FoilPresenter(object):
The song object. The song object.
""" """
sxml = SongXML() sxml = SongXML()
search_text = u''
temp_verse_order = {} temp_verse_order = {}
temp_verse_order_backup = [] temp_verse_order_backup = []
temp_sortnr_backup = 1 temp_sortnr_backup = 1
@ -452,7 +450,6 @@ class FoilPresenter(object):
else: else:
verse_type = u'O' verse_type = u'O'
verse_number = re.compile(u'[a-zA-Z.+-_ ]*').sub(u'', verse_name) verse_number = re.compile(u'[a-zA-Z.+-_ ]*').sub(u'', verse_name)
#verse_part = re.compile(u'[0-9]*').sub(u'', verse_name[1:])
# Foilpresenter allows e. g. "C", but we need "C1". # Foilpresenter allows e. g. "C", but we need "C1".
if not verse_number: if not verse_number:
verse_number = unicode(versenumber[verse_type]) verse_number = unicode(versenumber[verse_type])
@ -470,8 +467,6 @@ class FoilPresenter(object):
temp_verse_order_backup.append(u''.join((verse_type[0], temp_verse_order_backup.append(u''.join((verse_type[0],
verse_number))) verse_number)))
sxml.add_verse_to_lyrics(verse_type, verse_number, text) sxml.add_verse_to_lyrics(verse_type, verse_number, text)
search_text = search_text + text
song.search_lyrics = search_text.lower()
song.lyrics = unicode(sxml.extract_xml(), u'utf-8') song.lyrics = unicode(sxml.extract_xml(), u'utf-8')
# Process verse order # Process verse order
verse_order = [] verse_order = []
@ -487,6 +482,7 @@ class FoilPresenter(object):
numberx = temp_sortnr_liste[int(number)] numberx = temp_sortnr_liste[int(number)]
verse_order.append(temp_verse_order[unicode(numberx)]) verse_order.append(temp_verse_order[unicode(numberx)])
song.verse_order = u' '.join(verse_order) song.verse_order = u' '.join(verse_order)
song.search_lyrics = u''
def _process_songbooks(self, foilpresenterfolie, song): def _process_songbooks(self, foilpresenterfolie, song):
""" """
@ -534,13 +530,10 @@ class FoilPresenter(object):
for titelstring in foilpresenterfolie.titel.titelstring: for titelstring in foilpresenterfolie.titel.titelstring:
if not song.title: if not song.title:
song.title = self._child(titelstring) song.title = self._child(titelstring)
song.search_title = unicode(song.title)
song.alternate_title = u'' song.alternate_title = u''
else: else:
song.alternate_title = self._child(titelstring) song.alternate_title = self._child(titelstring)
song.search_title += u'@' + song.alternate_title song.search_title = u''
song.search_title = re.sub(r'[\'"`,;:(){}?]+', u'',
unicode(song.search_title)).lower().strip()
def _process_topics(self, foilpresenterfolie, song): def _process_topics(self, foilpresenterfolie, song):
""" """
@ -565,10 +558,3 @@ class FoilPresenter(object):
song.topics.append(topic) song.topics.append(topic)
except AttributeError: except AttributeError:
pass pass
def _dump_xml(self, xml):
"""
Debugging aid to dump XML so that we can see what we have.
"""
return etree.tostring(xml, encoding=u'UTF-8',
xml_declaration=True, pretty_print=True)

View File

@ -268,9 +268,8 @@ class SongMediaItem(MediaManagerItem):
Receiver.send_message(u'songs_load_list') Receiver.send_message(u'songs_load_list')
def onExportClick(self): def onExportClick(self):
if not hasattr(self, u'export_wizard'): export_wizard = SongExportForm(self, self.parent)
self.export_wizard = SongExportForm(self, self.parent) export_wizard.exec_()
self.export_wizard.exec_()
def onNewClick(self): def onNewClick(self):
log.debug(u'onNewClick') log.debug(u'onNewClick')
@ -414,7 +413,7 @@ class SongMediaItem(MediaManagerItem):
def serviceLoad(self, item): def serviceLoad(self, item):
""" """
Triggered by a song being loaded by the service item Triggered by a song being loaded by the service manager.
""" """
log.debug(u'serviceLoad') log.debug(u'serviceLoad')
if self.plugin.status != PluginStatus.Active or not item.data_string: if self.plugin.status != PluginStatus.Active or not item.data_string:
@ -435,8 +434,7 @@ class SongMediaItem(MediaManagerItem):
for song in search_results: for song in search_results:
same_authors = True same_authors = True
# If the author counts are different, we do not have to do any # If the author counts are different, we do not have to do any
# further checking. This is also important when a song does not # further checking.
# have any author (because we can not loop over an empty list).
if len(song.authors) == len(author_list): if len(song.authors) == len(author_list):
for author in song.authors: for author in song.authors:
if author.display_name not in author_list: if author.display_name not in author_list:

View File

@ -36,7 +36,7 @@ from sqlalchemy.orm.exc import UnmappedClassError
from openlp.core.lib import translate from openlp.core.lib import translate
from openlp.core.lib.db import BaseModel from openlp.core.lib.db import BaseModel
from openlp.plugins.songs.lib import add_author_unknown from openlp.plugins.songs.lib import clean_song
from openlp.plugins.songs.lib.db import Author, Book, Song, Topic #, MediaFile from openlp.plugins.songs.lib.db import Author, Book, Song, Topic #, MediaFile
from songimport import SongImport from songimport import SongImport
@ -165,12 +165,10 @@ class OpenLPSongImport(SongImport):
old_titles = song.search_title.split(u'@') old_titles = song.search_title.split(u'@')
if len(old_titles) > 1: if len(old_titles) > 1:
new_song.alternate_title = old_titles[1] new_song.alternate_title = old_titles[1]
else: new_song.search_title = u''
new_song.alternate_title = u'' new_song.search_lyrics = u''
new_song.search_title = song.search_title.strip()
new_song.song_number = song.song_number new_song.song_number = song.song_number
new_song.lyrics = song.lyrics new_song.lyrics = song.lyrics
new_song.search_lyrics = song.search_lyrics
new_song.verse_order = song.verse_order new_song.verse_order = song.verse_order
new_song.copyright = song.copyright new_song.copyright = song.copyright
new_song.comments = song.comments new_song.comments = song.comments
@ -179,31 +177,26 @@ class OpenLPSongImport(SongImport):
for author in song.authors: for author in song.authors:
existing_author = self.manager.get_object_filtered( existing_author = self.manager.get_object_filtered(
Author, Author.display_name == author.display_name) Author, Author.display_name == author.display_name)
if existing_author: if existing_author is None:
new_song.authors.append(existing_author) existing_author = Author.populate(
else:
new_song.authors.append(Author.populate(
first_name=author.first_name, first_name=author.first_name,
last_name=author.last_name, last_name=author.last_name,
display_name=author.display_name)) display_name=author.display_name)
if not new_song.authors: new_song.authors.append(existing_author)
add_author_unknown(self.manager, new_song)
if song.book: if song.book:
existing_song_book = self.manager.get_object_filtered( existing_song_book = self.manager.get_object_filtered(
Book, Book.name == song.book.name) Book, Book.name == song.book.name)
if existing_song_book: if existing_song_book is None:
new_song.book = existing_song_book existing_song_book = Book.populate(name=song.book.name,
else:
new_song.book = Book.populate(name=song.book.name,
publisher=song.book.publisher) publisher=song.book.publisher)
new_song.book = existing_song_book
if song.topics: if song.topics:
for topic in song.topics: for topic in song.topics:
existing_topic = self.manager.get_object_filtered( existing_topic = self.manager.get_object_filtered(
Topic, Topic.name == topic.name) Topic, Topic.name == topic.name)
if existing_topic: if existing_topic is None:
new_song.topics.append(existing_topic) existing_topic = Topic.populate(name=topic.name)
else: new_song.topics.append(existing_topic)
new_song.topics.append(Topic.populate(name=topic.name))
# if has_media_files: # if has_media_files:
# if song.media_files: # if song.media_files:
# for media_file in song.media_files: # for media_file in song.media_files:
@ -215,6 +208,7 @@ class OpenLPSongImport(SongImport):
# else: # else:
# new_song.media_files.append(MediaFile.populate( # new_song.media_files.append(MediaFile.populate(
# file_name=media_file.file_name)) # file_name=media_file.file_name))
clean_song(self.manager, new_song)
self.manager.save_object(new_song) self.manager.save_object(new_song)
song_count += 1 song_count += 1
if self.stop_import_flag: if self.stop_import_flag:

View File

@ -29,7 +29,7 @@ import re
from PyQt4 import QtCore from PyQt4 import QtCore
from openlp.core.lib import Receiver, translate from openlp.core.lib import Receiver, translate
from openlp.plugins.songs.lib import add_author_unknown, VerseType from openlp.plugins.songs.lib import clean_song, VerseType
from openlp.plugins.songs.lib.db import Song, Author, Topic, Book, MediaFile from openlp.plugins.songs.lib.db import Song, Author, Topic, Book, MediaFile
from openlp.plugins.songs.lib.ui import SongStrings from openlp.plugins.songs.lib.ui import SongStrings
from openlp.plugins.songs.lib.xml import SongXML from openlp.plugins.songs.lib.xml import SongXML
@ -244,12 +244,6 @@ class SongImport(QtCore.QObject):
else: else:
return True return True
def remove_punctuation(self, text):
"""
Extracts alphanumeric words for searchable fields
"""
return re.sub(r'\W+', u' ', text, re.UNICODE)
def finish(self): def finish(self):
""" """
All fields have been set to this song. Write the song to disk. All fields have been set to this song. Write the song to disk.
@ -258,11 +252,7 @@ class SongImport(QtCore.QObject):
song = Song() song = Song()
song.title = self.title song.title = self.title
song.alternate_title = self.alternate_title song.alternate_title = self.alternate_title
song.search_title = self.remove_punctuation(self.title).lower() \
+ '@' + self.remove_punctuation(self.alternate_title).lower()
song.search_title = song.search_title.strip()
song.song_number = self.song_number song.song_number = self.song_number
song.search_lyrics = u''
verses_changed_to_other = {} verses_changed_to_other = {}
sxml = SongXML() sxml = SongXML()
other_count = 1 other_count = 1
@ -279,8 +269,6 @@ class SongImport(QtCore.QObject):
new_verse_def) new_verse_def)
verse_def = new_verse_def verse_def = new_verse_def
sxml.add_verse_to_lyrics(verse_tag, verse_def[1:], verse_text, lang) sxml.add_verse_to_lyrics(verse_tag, verse_def[1:], verse_text, lang)
song.search_lyrics += u' ' + self.remove_punctuation(verse_text)
song.search_lyrics = song.search_lyrics.lower()
song.lyrics = unicode(sxml.extract_xml(), u'utf-8') song.lyrics = unicode(sxml.extract_xml(), u'utf-8')
if not len(self.verse_order_list) and \ if not len(self.verse_order_list) and \
self.verse_order_list_generated_useful: self.verse_order_list_generated_useful:
@ -302,9 +290,6 @@ class SongImport(QtCore.QObject):
last_name=authortext.split(u' ')[-1], last_name=authortext.split(u' ')[-1],
first_name=u' '.join(authortext.split(u' ')[:-1])) first_name=u' '.join(authortext.split(u' ')[:-1]))
song.authors.append(author) song.authors.append(author)
# No author, add the default author.
if not song.authors:
add_author_unknown(self.manager, song)
for filename in self.media_files: for filename in self.media_files:
media_file = self.manager.get_object_filtered(MediaFile, media_file = self.manager.get_object_filtered(MediaFile,
MediaFile.file_name == filename) MediaFile.file_name == filename)
@ -325,6 +310,7 @@ class SongImport(QtCore.QObject):
if topic is None: if topic is None:
topic = Topic.populate(name=topictext) topic = Topic.populate(name=topictext)
song.topics.append(topic) song.topics.append(topic)
clean_song(self.manager, song)
self.manager.save_object(song) self.manager.save_object(song)
self.set_defaults() self.set_defaults()

View File

@ -66,7 +66,7 @@ import re
from lxml import etree, objectify from lxml import etree, objectify
from openlp.plugins.songs.lib import add_author_unknown, VerseType from openlp.plugins.songs.lib import clean_song, VerseType
from openlp.plugins.songs.lib.db import Author, Book, Song, Topic from openlp.plugins.songs.lib.db import Author, Book, Song, Topic
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -236,10 +236,9 @@ class OpenLyrics(object):
datetime.datetime.now().strftime(u'%Y-%m-%dT%H:%M:%S')) datetime.datetime.now().strftime(u'%Y-%m-%dT%H:%M:%S'))
properties = etree.SubElement(song_xml, u'properties') properties = etree.SubElement(song_xml, u'properties')
titles = etree.SubElement(properties, u'titles') titles = etree.SubElement(properties, u'titles')
self._add_text_to_element(u'title', titles, song.title.strip()) self._add_text_to_element(u'title', titles, song.title)
if song.alternate_title: if song.alternate_title:
self._add_text_to_element( self._add_text_to_element(u'title', titles, song.alternate_title)
u'title', titles, song.alternate_title.strip())
if song.comments: if song.comments:
comments = etree.SubElement(properties, u'comments') comments = etree.SubElement(properties, u'comments')
self._add_text_to_element(u'comment', comments, song.comments) self._add_text_to_element(u'comment', comments, song.comments)
@ -312,6 +311,7 @@ class OpenLyrics(object):
self._process_authors(properties, song) self._process_authors(properties, song)
self._process_songbooks(properties, song) self._process_songbooks(properties, song)
self._process_topics(properties, song) self._process_topics(properties, song)
clean_song(self.manager, song)
self.manager.save_object(song) self.manager.save_object(song)
return song.id return song.id
@ -382,8 +382,6 @@ class OpenLyrics(object):
last_name=display_name.split(u' ')[-1], last_name=display_name.split(u' ')[-1],
first_name=u' '.join(display_name.split(u' ')[:-1])) first_name=u' '.join(display_name.split(u' ')[:-1]))
song.authors.append(author) song.authors.append(author)
if not song.authors:
add_author_unknown(self.manager, song)
def _process_cclinumber(self, properties, song): def _process_cclinumber(self, properties, song):
""" """
@ -443,7 +441,6 @@ class OpenLyrics(object):
The song object. The song object.
""" """
sxml = SongXML() sxml = SongXML()
search_text = u''
for verse in lyrics.verse: for verse in lyrics.verse:
text = u'' text = u''
for lines in verse.lines: for lines in verse.lines:
@ -462,8 +459,7 @@ class OpenLyrics(object):
if self._get(verse, u'lang'): if self._get(verse, u'lang'):
lang = self._get(verse, u'lang') lang = self._get(verse, u'lang')
sxml.add_verse_to_lyrics(verse_type, verse_number, text, lang) sxml.add_verse_to_lyrics(verse_type, verse_number, text, lang)
search_text = search_text + text song.search_lyrics = u''
song.search_lyrics = search_text.lower()
song.lyrics = unicode(sxml.extract_xml(), u'utf-8') song.lyrics = unicode(sxml.extract_xml(), u'utf-8')
# Process verse order # Process verse order
if hasattr(properties, u'verseOrder'): if hasattr(properties, u'verseOrder'):
@ -510,13 +506,9 @@ class OpenLyrics(object):
for title in properties.titles.title: for title in properties.titles.title:
if not song.title: if not song.title:
song.title = self._text(title) song.title = self._text(title)
song.search_title = unicode(song.title)
song.alternate_title = u'' song.alternate_title = u''
else: else:
song.alternate_title = self._text(title) song.alternate_title = self._text(title)
song.search_title += u'@' + song.alternate_title
song.search_title = re.sub(r'[\'"`,;:(){}?]+', u'',
unicode(song.search_title)).lower().strip()
def _process_topics(self, properties, song): def _process_topics(self, properties, song):
""" """

View File

@ -25,15 +25,13 @@
############################################################################### ###############################################################################
import logging import logging
import re
from PyQt4 import QtCore, QtGui from PyQt4 import QtCore, QtGui
from openlp.core.lib import Plugin, StringContent, build_icon, translate from openlp.core.lib import Plugin, StringContent, build_icon, translate
from openlp.core.lib.db import Manager from openlp.core.lib.db import Manager
from openlp.core.lib.ui import UiStrings from openlp.core.lib.ui import UiStrings
from openlp.plugins.songs.lib import add_author_unknown, SongMediaItem, \ from openlp.plugins.songs.lib import clean_song, SongMediaItem, SongsTab
SongsTab, SongXML
from openlp.plugins.songs.lib.db import init_schema, Song from openlp.plugins.songs.lib.db import init_schema, Song
from openlp.plugins.songs.lib.importer import SongFormat from openlp.plugins.songs.lib.importer import SongFormat
@ -58,7 +56,6 @@ class SongsPlugin(Plugin):
self.manager = Manager(u'songs', init_schema) self.manager = Manager(u'songs', init_schema)
self.icon_path = u':/plugins/plugin_songs.png' self.icon_path = u':/plugins/plugin_songs.png'
self.icon = build_icon(self.icon_path) self.icon = build_icon(self.icon_path)
self.whitespace = re.compile(r'\W+', re.UNICODE)
def initialise(self): def initialise(self):
log.info(u'Songs Initialising') log.info(u'Songs Initialising')
@ -139,36 +136,14 @@ class SongsPlugin(Plugin):
maxSongs = self.manager.get_object_count(Song) maxSongs = self.manager.get_object_count(Song)
progressDialog = QtGui.QProgressDialog( progressDialog = QtGui.QProgressDialog(
translate('SongsPlugin', 'Reindexing songs...'), UiStrings.Cancel, translate('SongsPlugin', 'Reindexing songs...'), UiStrings.Cancel,
0, maxSongs + 1, self.formparent) 0, maxSongs, self.formparent)
progressDialog.setWindowModality(QtCore.Qt.WindowModal) progressDialog.setWindowModality(QtCore.Qt.WindowModal)
songs = self.manager.get_all_objects(Song) songs = self.manager.get_all_objects(Song)
counter = 0 for number, song in enumerate(songs):
for song in songs: clean_song(self.manager, song)
counter += 1 progressDialog.setValue(number + 1)
# The song does not have any author, add one.
if not song.authors:
add_author_unknown(self.manager, song)
if song.title is None:
song.title = u''
if song.alternate_title is None:
song.alternate_title = u''
song.search_title = self.whitespace.sub(u' ', song.title.lower() +
u' ' + song.alternate_title.lower()).strip()
# Remove the "language" attribute from lyrics tag. This is not very
# important, but this keeps the database clean. This can be removed
# when everybody has run the reindex tool once.
song.lyrics = song.lyrics.replace(
u'<lyrics language="en">', u'<lyrics>')
lyrics = u''
verses = SongXML().get_verses(song.lyrics)
for verse in verses:
lyrics = lyrics + self.whitespace.sub(u' ', verse[1]) + u' '
song.search_lyrics = lyrics.lower()
progressDialog.setValue(counter)
self.manager.save_objects(songs) self.manager.save_objects(songs)
progressDialog.setValue(counter + 1) self.mediaItem.onSearchTextButtonClick()
self.mediaItem.displayResultsSong(
self.manager.get_all_objects(Song, order_by_ref=Song.search_title))
def onSongImportItemClicked(self): def onSongImportItemClicked(self):
if self.mediaItem: if self.mediaItem:
@ -179,10 +154,9 @@ class SongsPlugin(Plugin):
self.mediaItem.onExportClick() self.mediaItem.onExportClick()
def about(self): def about(self):
about_text = translate('SongsPlugin', '<strong>Songs Plugin</strong>' return translate('SongsPlugin', '<strong>Songs Plugin</strong>'
'<br />The songs plugin provides the ability to display and ' '<br />The songs plugin provides the ability to display and '
'manage songs.') 'manage songs.')
return about_text
def usesTheme(self, theme): def usesTheme(self, theme):
""" """