forked from openlp/openlp
Added basic duplicate song detection, no removal or fancy GUI yet.
This commit is contained in:
parent
603249b2a3
commit
2dfb7bec9c
114
openlp/plugins/songs/lib/doublesfinder.py
Normal file
114
openlp/plugins/songs/lib/doublesfinder.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# OpenLP - Open Source Lyrics Projection #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# Copyright (c) 2008-2013 Raoul Snyman #
|
||||||
|
# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan #
|
||||||
|
# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, #
|
||||||
|
# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer. #
|
||||||
|
# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru, #
|
||||||
|
# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, #
|
||||||
|
# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock, #
|
||||||
|
# Frode Woldsund, Martin Zibricky, Patrick Zimmermann #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# This program is free software; you can redistribute it and/or modify it #
|
||||||
|
# under the terms of the GNU General Public License as published by the Free #
|
||||||
|
# Software Foundation; version 2 of the License. #
|
||||||
|
# #
|
||||||
|
# This program is distributed in the hope that it will be useful, but WITHOUT #
|
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
|
||||||
|
# more details. #
|
||||||
|
# #
|
||||||
|
# You should have received a copy of the GNU General Public License along #
|
||||||
|
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
||||||
|
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
||||||
|
###############################################################################
|
||||||
|
"""
|
||||||
|
The :mod:`dreambeamimport` module provides the functionality for importing
|
||||||
|
DreamBeam songs into the OpenLP database.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import difflib
|
||||||
|
|
||||||
|
from openlp.core.lib import translate
|
||||||
|
from openlp.plugins.songs.lib.db import Song
|
||||||
|
from openlp.plugins.songs.lib.ui import SongStrings
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DuplicateSongFinder(object):
|
||||||
|
"""
|
||||||
|
The :class:`DreamBeamImport` class provides functionality to search for
|
||||||
|
and remove duplicate songs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.minFragmentSize = 5
|
||||||
|
self.minBlockSize = 70
|
||||||
|
self.maxTypoSize = 3
|
||||||
|
|
||||||
|
def songsProbablyEqual(self, song1, song2):
|
||||||
|
if len(song1.search_lyrics) < len(song2.search_lyrics):
|
||||||
|
small = song1.search_lyrics
|
||||||
|
large = song2.search_lyrics
|
||||||
|
else:
|
||||||
|
small = song2.search_lyrics
|
||||||
|
large = song1.search_lyrics
|
||||||
|
differ = difflib.SequenceMatcher(a=small, b=large)
|
||||||
|
diff_tuples = differ.get_opcodes()
|
||||||
|
diff_no_typos = self.__removeTypos(diff_tuples)
|
||||||
|
#print(diff_no_typos)
|
||||||
|
if self.__lengthOfEqualBlocks(diff_no_typos) >= self.minBlockSize or \
|
||||||
|
self.__lengthOfLongestEqualBlock(diff_no_typos) > len(small)*2/3:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __opLength(self, opcode):
|
||||||
|
return max(opcode[2]-opcode[1], opcode[4] - opcode[3])
|
||||||
|
|
||||||
|
def __removeTypos(self, diff):
|
||||||
|
#remove typo at beginning of string
|
||||||
|
if len(diff) >= 2:
|
||||||
|
if diff[0][0] != "equal" and self.__opLength(diff[0]) <= self.maxTypoSize and \
|
||||||
|
self.__opLength(diff[1]) >= self.minFragmentSize:
|
||||||
|
del diff[0]
|
||||||
|
#remove typos in the middle of string
|
||||||
|
if len(diff) >= 3:
|
||||||
|
for index in range(len(diff)-3, -1, -1):
|
||||||
|
if self.__opLength(diff[index]) >= self.minFragmentSize and \
|
||||||
|
diff[index+1][0] != "equal" and self.__opLength(diff[index+1]) <= self.maxTypoSize and \
|
||||||
|
self.__opLength(diff[index+2]) >= self.minFragmentSize:
|
||||||
|
del diff[index+1]
|
||||||
|
#remove typo at the end of string
|
||||||
|
if len(diff) >= 2:
|
||||||
|
if self.__opLength(diff[-2]) >= self.minFragmentSize and \
|
||||||
|
diff[-1][0] != "equal" and self.__opLength(diff[-1]) <= self.maxTypoSize:
|
||||||
|
del diff[-1]
|
||||||
|
|
||||||
|
#merge fragments
|
||||||
|
for index in range(len(diff)-2, -1, -1):
|
||||||
|
if diff[index][0] == "equal" and self.__opLength(diff[index]) >= self.minFragmentSize and \
|
||||||
|
diff[index+1][0] == "equal" and self.__opLength(diff[index+1]) >= self.minFragmentSize:
|
||||||
|
diff[index] = ("equal", diff[index][1], diff[index+1][2], diff[index][3],
|
||||||
|
diff[index+1][4])
|
||||||
|
del diff[index+1]
|
||||||
|
|
||||||
|
return diff
|
||||||
|
|
||||||
|
def __lengthOfEqualBlocks(self, diff):
|
||||||
|
length = 0
|
||||||
|
for element in diff:
|
||||||
|
if element[0] == "equal" and self.__opLength(element) >= self.minBlockSize:
|
||||||
|
length += self.__opLength(element)
|
||||||
|
return length
|
||||||
|
|
||||||
|
def __lengthOfLongestEqualBlock(self, diff):
|
||||||
|
length = 0
|
||||||
|
for element in diff:
|
||||||
|
if element[0] == "equal" and self.__opLength(element) > length:
|
||||||
|
length = self.__opLength(element)
|
||||||
|
return length
|
@ -45,6 +45,7 @@ from openlp.plugins.songs.lib import clean_song, upgrade, SongMediaItem, \
|
|||||||
from openlp.plugins.songs.lib.db import init_schema, Song
|
from openlp.plugins.songs.lib.db import init_schema, Song
|
||||||
from openlp.plugins.songs.lib.importer import SongFormat
|
from openlp.plugins.songs.lib.importer import SongFormat
|
||||||
from openlp.plugins.songs.lib.olpimport import OpenLPSongImport
|
from openlp.plugins.songs.lib.olpimport import OpenLPSongImport
|
||||||
|
from openlp.plugins.songs.lib.doublesfinder import DuplicateSongFinder
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -77,10 +78,12 @@ class SongsPlugin(Plugin):
|
|||||||
self.songImportItem.setVisible(True)
|
self.songImportItem.setVisible(True)
|
||||||
self.songExportItem.setVisible(True)
|
self.songExportItem.setVisible(True)
|
||||||
self.toolsReindexItem.setVisible(True)
|
self.toolsReindexItem.setVisible(True)
|
||||||
|
self.toolsFindDuplicates.setVisible(True)
|
||||||
action_list = ActionList.get_instance()
|
action_list = ActionList.get_instance()
|
||||||
action_list.add_action(self.songImportItem, UiStrings().Import)
|
action_list.add_action(self.songImportItem, UiStrings().Import)
|
||||||
action_list.add_action(self.songExportItem, UiStrings().Export)
|
action_list.add_action(self.songExportItem, UiStrings().Export)
|
||||||
action_list.add_action(self.toolsReindexItem, UiStrings().Tools)
|
action_list.add_action(self.toolsReindexItem, UiStrings().Tools)
|
||||||
|
action_list.add_action(self.toolsFindDuplicates, UiStrings().Tools)
|
||||||
QtCore.QObject.connect(Receiver.get_receiver(),
|
QtCore.QObject.connect(Receiver.get_receiver(),
|
||||||
QtCore.SIGNAL(u'servicemanager_new_service'),
|
QtCore.SIGNAL(u'servicemanager_new_service'),
|
||||||
self.clearTemporarySongs)
|
self.clearTemporarySongs)
|
||||||
@ -122,7 +125,7 @@ class SongsPlugin(Plugin):
|
|||||||
|
|
||||||
def addToolsMenuItem(self, tools_menu):
|
def addToolsMenuItem(self, tools_menu):
|
||||||
"""
|
"""
|
||||||
Give the alerts plugin the opportunity to add items to the
|
Give the Songs plugin the opportunity to add items to the
|
||||||
**Tools** menu.
|
**Tools** menu.
|
||||||
|
|
||||||
``tools_menu``
|
``tools_menu``
|
||||||
@ -137,6 +140,12 @@ class SongsPlugin(Plugin):
|
|||||||
'Re-index the songs database to improve searching and ordering.'),
|
'Re-index the songs database to improve searching and ordering.'),
|
||||||
visible=False, triggers=self.onToolsReindexItemTriggered)
|
visible=False, triggers=self.onToolsReindexItemTriggered)
|
||||||
tools_menu.addAction(self.toolsReindexItem)
|
tools_menu.addAction(self.toolsReindexItem)
|
||||||
|
self.toolsFindDuplicates = create_action(tools_menu, u'toolsFindDuplicates',
|
||||||
|
text=translate('SongsPlugin', 'Find &duplicate songs'),
|
||||||
|
statustip=translate('SongsPlugin',
|
||||||
|
'Find and remove duplicate songs in the song database.'),
|
||||||
|
visible=False, triggers=self.onToolsFindDuplicatesTriggered)
|
||||||
|
tools_menu.addAction(self.toolsFindDuplicates)
|
||||||
|
|
||||||
def onToolsReindexItemTriggered(self):
|
def onToolsReindexItemTriggered(self):
|
||||||
"""
|
"""
|
||||||
@ -157,6 +166,25 @@ class SongsPlugin(Plugin):
|
|||||||
self.manager.save_objects(songs)
|
self.manager.save_objects(songs)
|
||||||
self.mediaItem.onSearchTextButtonClicked()
|
self.mediaItem.onSearchTextButtonClicked()
|
||||||
|
|
||||||
|
def onToolsFindDuplicatesTriggered(self):
|
||||||
|
"""
|
||||||
|
Search for duplicates in the song database.
|
||||||
|
"""
|
||||||
|
maxSongs = self.manager.get_object_count(Song)
|
||||||
|
if maxSongs == 0:
|
||||||
|
return
|
||||||
|
QtGui.QMessageBox.information(self.formParent,
|
||||||
|
"Find duplicates called", "Called...")
|
||||||
|
songs = self.manager.get_all_objects(Song)
|
||||||
|
for outerSongCounter in range(maxSongs-1):
|
||||||
|
for innerSongCounter in range(outerSongCounter+1, maxSongs):
|
||||||
|
doubleFinder = DuplicateSongFinder()
|
||||||
|
if doubleFinder.songsProbablyEqual(songs[outerSongCounter],
|
||||||
|
songs[innerSongCounter]):
|
||||||
|
QtGui.QMessageBox.information(self.formParent,
|
||||||
|
"Double found", str(innerSongCounter) + " " +
|
||||||
|
str(outerSongCounter))
|
||||||
|
|
||||||
def onSongImportItemClicked(self):
|
def onSongImportItemClicked(self):
|
||||||
if self.mediaItem:
|
if self.mediaItem:
|
||||||
self.mediaItem.onImportClick()
|
self.mediaItem.onImportClick()
|
||||||
@ -280,10 +308,12 @@ class SongsPlugin(Plugin):
|
|||||||
self.songImportItem.setVisible(False)
|
self.songImportItem.setVisible(False)
|
||||||
self.songExportItem.setVisible(False)
|
self.songExportItem.setVisible(False)
|
||||||
self.toolsReindexItem.setVisible(False)
|
self.toolsReindexItem.setVisible(False)
|
||||||
|
self.toolsFindDuplicates.setVisible(False)
|
||||||
action_list = ActionList.get_instance()
|
action_list = ActionList.get_instance()
|
||||||
action_list.remove_action(self.songImportItem, UiStrings().Import)
|
action_list.remove_action(self.songImportItem, UiStrings().Import)
|
||||||
action_list.remove_action(self.songExportItem, UiStrings().Export)
|
action_list.remove_action(self.songExportItem, UiStrings().Export)
|
||||||
action_list.remove_action(self.toolsReindexItem, UiStrings().Tools)
|
action_list.remove_action(self.toolsReindexItem, UiStrings().Tools)
|
||||||
|
action_list.remove_action(self.toolsFindDuplicates, UiStrings().Tools)
|
||||||
Plugin.finalise(self)
|
Plugin.finalise(self)
|
||||||
|
|
||||||
def clearTemporarySongs(self):
|
def clearTemporarySongs(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user