diff --git a/openlp/plugins/songs/lib/doublesfinder.py b/openlp/plugins/songs/lib/doublesfinder.py new file mode 100644 index 000000000..f1098a7c3 --- /dev/null +++ b/openlp/plugins/songs/lib/doublesfinder.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4 + +############################################################################### +# OpenLP - Open Source Lyrics Projection # +# --------------------------------------------------------------------------- # +# Copyright (c) 2008-2013 Raoul Snyman # +# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan # +# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, # +# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer. # +# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru, # +# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, # +# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock, # +# Frode Woldsund, Martin Zibricky, Patrick Zimmermann # +# --------------------------------------------------------------------------- # +# This program is free software; you can redistribute it and/or modify it # +# under the terms of the GNU General Public License as published by the Free # +# Software Foundation; version 2 of the License. # +# # +# This program is distributed in the hope that it will be useful, but WITHOUT # +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # +# more details. # +# # +# You should have received a copy of the GNU General Public License along # +# with this program; if not, write to the Free Software Foundation, Inc., 59 # +# Temple Place, Suite 330, Boston, MA 02111-1307 USA # +############################################################################### +""" +The :mod:`dreambeamimport` module provides the functionality for importing +DreamBeam songs into the OpenLP database. +""" +import logging +import difflib + +from openlp.core.lib import translate +from openlp.plugins.songs.lib.db import Song +from openlp.plugins.songs.lib.ui import SongStrings + +log = logging.getLogger(__name__) + +class DuplicateSongFinder(object): + """ + The :class:`DreamBeamImport` class provides functionality to search for + and remove duplicate songs. + """ + + def __init__(self): + self.minFragmentSize = 5 + self.minBlockSize = 70 + self.maxTypoSize = 3 + + def songsProbablyEqual(self, song1, song2): + if len(song1.search_lyrics) < len(song2.search_lyrics): + small = song1.search_lyrics + large = song2.search_lyrics + else: + small = song2.search_lyrics + large = song1.search_lyrics + differ = difflib.SequenceMatcher(a=small, b=large) + diff_tuples = differ.get_opcodes() + diff_no_typos = self.__removeTypos(diff_tuples) + #print(diff_no_typos) + if self.__lengthOfEqualBlocks(diff_no_typos) >= self.minBlockSize or \ + self.__lengthOfLongestEqualBlock(diff_no_typos) > len(small)*2/3: + return True + else: + return False + + def __opLength(self, opcode): + return max(opcode[2]-opcode[1], opcode[4] - opcode[3]) + + def __removeTypos(self, diff): + #remove typo at beginning of string + if len(diff) >= 2: + if diff[0][0] != "equal" and self.__opLength(diff[0]) <= self.maxTypoSize and \ + self.__opLength(diff[1]) >= self.minFragmentSize: + del diff[0] + #remove typos in the middle of string + if len(diff) >= 3: + for index in range(len(diff)-3, -1, -1): + if self.__opLength(diff[index]) >= self.minFragmentSize and \ + diff[index+1][0] != "equal" and self.__opLength(diff[index+1]) <= self.maxTypoSize and \ + self.__opLength(diff[index+2]) >= self.minFragmentSize: + del diff[index+1] + #remove typo at the end of string + if len(diff) >= 2: + if self.__opLength(diff[-2]) >= self.minFragmentSize and \ + diff[-1][0] != "equal" and self.__opLength(diff[-1]) <= self.maxTypoSize: + del diff[-1] + + #merge fragments + for index in range(len(diff)-2, -1, -1): + if diff[index][0] == "equal" and self.__opLength(diff[index]) >= self.minFragmentSize and \ + diff[index+1][0] == "equal" and self.__opLength(diff[index+1]) >= self.minFragmentSize: + diff[index] = ("equal", diff[index][1], diff[index+1][2], diff[index][3], + diff[index+1][4]) + del diff[index+1] + + return diff + + def __lengthOfEqualBlocks(self, diff): + length = 0 + for element in diff: + if element[0] == "equal" and self.__opLength(element) >= self.minBlockSize: + length += self.__opLength(element) + return length + + def __lengthOfLongestEqualBlock(self, diff): + length = 0 + for element in diff: + if element[0] == "equal" and self.__opLength(element) > length: + length = self.__opLength(element) + return length diff --git a/openlp/plugins/songs/songsplugin.py b/openlp/plugins/songs/songsplugin.py index 83e54512c..9c696489d 100644 --- a/openlp/plugins/songs/songsplugin.py +++ b/openlp/plugins/songs/songsplugin.py @@ -45,6 +45,7 @@ from openlp.plugins.songs.lib import clean_song, upgrade, SongMediaItem, \ from openlp.plugins.songs.lib.db import init_schema, Song from openlp.plugins.songs.lib.importer import SongFormat from openlp.plugins.songs.lib.olpimport import OpenLPSongImport +from openlp.plugins.songs.lib.doublesfinder import DuplicateSongFinder log = logging.getLogger(__name__) @@ -77,10 +78,12 @@ class SongsPlugin(Plugin): self.songImportItem.setVisible(True) self.songExportItem.setVisible(True) self.toolsReindexItem.setVisible(True) + self.toolsFindDuplicates.setVisible(True) action_list = ActionList.get_instance() action_list.add_action(self.songImportItem, UiStrings().Import) action_list.add_action(self.songExportItem, UiStrings().Export) action_list.add_action(self.toolsReindexItem, UiStrings().Tools) + action_list.add_action(self.toolsFindDuplicates, UiStrings().Tools) QtCore.QObject.connect(Receiver.get_receiver(), QtCore.SIGNAL(u'servicemanager_new_service'), self.clearTemporarySongs) @@ -122,7 +125,7 @@ class SongsPlugin(Plugin): def addToolsMenuItem(self, tools_menu): """ - Give the alerts plugin the opportunity to add items to the + Give the Songs plugin the opportunity to add items to the **Tools** menu. ``tools_menu`` @@ -137,6 +140,12 @@ class SongsPlugin(Plugin): 'Re-index the songs database to improve searching and ordering.'), visible=False, triggers=self.onToolsReindexItemTriggered) tools_menu.addAction(self.toolsReindexItem) + self.toolsFindDuplicates = create_action(tools_menu, u'toolsFindDuplicates', + text=translate('SongsPlugin', 'Find &duplicate songs'), + statustip=translate('SongsPlugin', + 'Find and remove duplicate songs in the song database.'), + visible=False, triggers=self.onToolsFindDuplicatesTriggered) + tools_menu.addAction(self.toolsFindDuplicates) def onToolsReindexItemTriggered(self): """ @@ -157,6 +166,25 @@ class SongsPlugin(Plugin): self.manager.save_objects(songs) self.mediaItem.onSearchTextButtonClicked() + def onToolsFindDuplicatesTriggered(self): + """ + Search for duplicates in the song database. + """ + maxSongs = self.manager.get_object_count(Song) + if maxSongs == 0: + return + QtGui.QMessageBox.information(self.formParent, + "Find duplicates called", "Called...") + songs = self.manager.get_all_objects(Song) + for outerSongCounter in range(maxSongs-1): + for innerSongCounter in range(outerSongCounter+1, maxSongs): + doubleFinder = DuplicateSongFinder() + if doubleFinder.songsProbablyEqual(songs[outerSongCounter], + songs[innerSongCounter]): + QtGui.QMessageBox.information(self.formParent, + "Double found", str(innerSongCounter) + " " + + str(outerSongCounter)) + def onSongImportItemClicked(self): if self.mediaItem: self.mediaItem.onImportClick() @@ -280,10 +308,12 @@ class SongsPlugin(Plugin): self.songImportItem.setVisible(False) self.songExportItem.setVisible(False) self.toolsReindexItem.setVisible(False) + self.toolsFindDuplicates.setVisible(False) action_list = ActionList.get_instance() action_list.remove_action(self.songImportItem, UiStrings().Import) action_list.remove_action(self.songExportItem, UiStrings().Export) action_list.remove_action(self.toolsReindexItem, UiStrings().Tools) + action_list.remove_action(self.toolsFindDuplicates, UiStrings().Tools) Plugin.finalise(self) def clearTemporarySongs(self):