forked from openlp/openlp
Remove non-needed test __init__ file. Split up testfunctions.
This commit is contained in:
parent
8c8cd3b867
commit
904620998f
153
openlp/plugins/songs/lib/songcompare.py
Normal file
153
openlp/plugins/songs/lib/songcompare.py
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# OpenLP - Open Source Lyrics Projection #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# Copyright (c) 2008-2013 Raoul Snyman #
|
||||||
|
# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan #
|
||||||
|
# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, #
|
||||||
|
# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer. #
|
||||||
|
# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru, #
|
||||||
|
# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, #
|
||||||
|
# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock, #
|
||||||
|
# Frode Woldsund, Martin Zibricky, Patrick Zimmermann #
|
||||||
|
# --------------------------------------------------------------------------- #
|
||||||
|
# This program is free software; you can redistribute it and/or modify it #
|
||||||
|
# under the terms of the GNU General Public License as published by the Free #
|
||||||
|
# Software Foundation; version 2 of the License. #
|
||||||
|
# #
|
||||||
|
# This program is distributed in the hope that it will be useful, but WITHOUT #
|
||||||
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
|
||||||
|
# more details. #
|
||||||
|
# #
|
||||||
|
# You should have received a copy of the GNU General Public License along #
|
||||||
|
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
|
||||||
|
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
||||||
|
###############################################################################
|
||||||
|
"""
|
||||||
|
The :mod:`songcompare` module provides functionality to search for
|
||||||
|
duplicate songs. It has one single :function:`songs_probably_equal`.
|
||||||
|
|
||||||
|
The algorithm is based on the diff algorithm.
|
||||||
|
First a diffset is calculated for two songs.
|
||||||
|
To compensate for typos all differences that are smaller than a
|
||||||
|
limit (<max_typo_size) and are surrounded by larger equal blocks
|
||||||
|
(>min_fragment_size) are removed and the surrounding equal parts are merged.
|
||||||
|
Finally two conditions can qualify a song tuple to be a duplicate:
|
||||||
|
1. There is a block of equal content that is at least min_block_size large.
|
||||||
|
This condition should hit for all larger songs that have a long enough
|
||||||
|
equal part. Even if only one verse is equal this condition should still hit.
|
||||||
|
2. Two thirds of the smaller song is contained in the larger song.
|
||||||
|
This condition should hit if one of the two songs (or both) is small (smaller
|
||||||
|
than the min_block_size), but most of the song is contained in the other song.
|
||||||
|
"""
|
||||||
|
import difflib
|
||||||
|
|
||||||
|
|
||||||
|
min_fragment_size = 5
|
||||||
|
min_block_size = 70
|
||||||
|
max_typo_size = 3
|
||||||
|
|
||||||
|
|
||||||
|
def songs_probably_equal(song1, song2):
|
||||||
|
"""
|
||||||
|
Calculate and return whether two songs are probably equal.
|
||||||
|
|
||||||
|
``song1``
|
||||||
|
The first song to compare.
|
||||||
|
|
||||||
|
``song2``
|
||||||
|
The second song to compare.
|
||||||
|
"""
|
||||||
|
if len(song1.search_lyrics) < len(song2.search_lyrics):
|
||||||
|
small = song1.search_lyrics
|
||||||
|
large = song2.search_lyrics
|
||||||
|
else:
|
||||||
|
small = song2.search_lyrics
|
||||||
|
large = song1.search_lyrics
|
||||||
|
differ = difflib.SequenceMatcher(a=large, b=small)
|
||||||
|
diff_tuples = differ.get_opcodes()
|
||||||
|
diff_no_typos = __remove_typos(diff_tuples)
|
||||||
|
if __length_of_equal_blocks(diff_no_typos) >= min_block_size or \
|
||||||
|
__length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def __op_length(opcode):
|
||||||
|
"""
|
||||||
|
Return the length of a given difference.
|
||||||
|
|
||||||
|
``opcode``
|
||||||
|
The difference.
|
||||||
|
"""
|
||||||
|
return max(opcode[2] - opcode[1], opcode[4] - opcode[3])
|
||||||
|
|
||||||
|
|
||||||
|
def __remove_typos(diff):
|
||||||
|
"""
|
||||||
|
Remove typos from a diff set. A typo is a small difference (<max_typo_size)
|
||||||
|
surrounded by larger equal passages (>min_fragment_size).
|
||||||
|
|
||||||
|
``diff``
|
||||||
|
The diff set to remove the typos from.
|
||||||
|
"""
|
||||||
|
# Remove typo at beginning of the string.
|
||||||
|
if len(diff) >= 2:
|
||||||
|
if diff[0][0] != "equal" and __op_length(diff[0]) <= max_typo_size and \
|
||||||
|
__op_length(diff[1]) >= min_fragment_size:
|
||||||
|
del diff[0]
|
||||||
|
# Remove typos in the middle of the string.
|
||||||
|
if len(diff) >= 3:
|
||||||
|
for index in range(len(diff) - 3, -1, -1):
|
||||||
|
if __op_length(diff[index]) >= min_fragment_size and \
|
||||||
|
diff[index + 1][0] != "equal" and __op_length(diff[index + 1]) <= max_typo_size and \
|
||||||
|
__op_length(diff[index + 2]) >= min_fragment_size:
|
||||||
|
del diff[index + 1]
|
||||||
|
# Remove typo at the end of the string.
|
||||||
|
if len(diff) >= 2:
|
||||||
|
if __op_length(diff[-2]) >= min_fragment_size and \
|
||||||
|
diff[-1][0] != "equal" and __op_length(diff[-1]) <= max_typo_size:
|
||||||
|
del diff[-1]
|
||||||
|
|
||||||
|
# Merge the bordering equal passages that occured by removing differences.
|
||||||
|
for index in range(len(diff) - 2, -1, -1):
|
||||||
|
if diff[index][0] == "equal" and __op_length(diff[index]) >= min_fragment_size and \
|
||||||
|
diff[index + 1][0] == "equal" and __op_length(diff[index + 1]) >= min_fragment_size:
|
||||||
|
diff[index] = ("equal", diff[index][1], diff[index + 1][2], diff[index][3],
|
||||||
|
diff[index + 1][4])
|
||||||
|
del diff[index + 1]
|
||||||
|
|
||||||
|
return diff
|
||||||
|
|
||||||
|
|
||||||
|
def __length_of_equal_blocks(diff):
|
||||||
|
"""
|
||||||
|
Return the total length of all equal blocks in a diff set.
|
||||||
|
Blocks smaller than min_block_size are not counted.
|
||||||
|
|
||||||
|
``diff``
|
||||||
|
The diff set to return the length for.
|
||||||
|
"""
|
||||||
|
length = 0
|
||||||
|
for element in diff:
|
||||||
|
if element[0] == "equal" and __op_length(element) >= min_block_size:
|
||||||
|
length += __op_length(element)
|
||||||
|
return length
|
||||||
|
|
||||||
|
|
||||||
|
def __length_of_longest_equal_block(diff):
|
||||||
|
"""
|
||||||
|
Return the length of the largest equal block in a diff set.
|
||||||
|
|
||||||
|
``diff``
|
||||||
|
The diff set to return the length for.
|
||||||
|
"""
|
||||||
|
length = 0
|
||||||
|
for element in diff:
|
||||||
|
if element[0] == "equal" and __op_length(element) > length:
|
||||||
|
length = __op_length(element)
|
||||||
|
return length
|
@ -1,8 +0,0 @@
|
|||||||
import sip
|
|
||||||
sip.setapi(u'QDate', 2)
|
|
||||||
sip.setapi(u'QDateTime', 2)
|
|
||||||
sip.setapi(u'QString', 2)
|
|
||||||
sip.setapi(u'QTextStream', 2)
|
|
||||||
sip.setapi(u'QTime', 2)
|
|
||||||
sip.setapi(u'QUrl', 2)
|
|
||||||
sip.setapi(u'QVariant', 2)
|
|
@ -34,64 +34,82 @@ from mock import MagicMock
|
|||||||
from openlp.plugins.songs.lib.songcompare import songs_probably_equal
|
from openlp.plugins.songs.lib.songcompare import songs_probably_equal
|
||||||
|
|
||||||
class TestLib(TestCase):
|
class TestLib(TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
"""
|
||||||
|
Mock up two songs and provide a set of lyrics for the songs_probably_equal tests.
|
||||||
|
"""
|
||||||
|
self.full_lyrics =u'''amazing grace how sweet the sound that saved a wretch like me i once was lost but now am
|
||||||
|
found was blind but now i see twas grace that taught my heart to fear and grace my fears relieved how
|
||||||
|
precious did that grace appear the hour i first believed through many dangers toils and snares i have already
|
||||||
|
come tis grace that brought me safe thus far and grace will lead me home'''
|
||||||
|
self.short_lyrics =u'''twas grace that taught my heart to fear and grace my fears relieved how precious did that
|
||||||
|
grace appear the hour i first believed'''
|
||||||
|
self.error_lyrics =u'''amazing how sweet the trumpet that saved a wrench like me i once was losst but now am
|
||||||
|
found waf blind but now i see it was grace that taught my heart to fear and grace my fears relieved how
|
||||||
|
precious did that grace appppppppear the hour i first believedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx snares i have
|
||||||
|
already come to this grace that brought me safe so far and grace will lead me home'''
|
||||||
|
self.different_lyrics=u'''on a hill far away stood an old rugged cross the emblem of suffering and shame and i love
|
||||||
|
that old cross where the dearest and best for a world of lost sinners was slain so ill cherish the old rugged
|
||||||
|
cross till my trophies at last i lay down i will cling to the old rugged cross and exchange it some day for a
|
||||||
|
crown'''
|
||||||
|
self.song1 = MagicMock()
|
||||||
|
self.song2 = MagicMock()
|
||||||
|
|
||||||
def songs_probably_equal_test(self):
|
def songs_probably_equal_same_song_test(self):
|
||||||
"""
|
"""
|
||||||
Test the songs_probably_equal function.
|
Test the songs_probably_equal function with twice the same song.
|
||||||
"""
|
"""
|
||||||
full_lyrics =u'''amazing grace how sweet the sound that saved a wretch like me i once was lost but now am
|
|
||||||
found was blind but now i see twas grace that taught my heart to fear and grace my fears relieved how
|
|
||||||
precious did that grace appear the hour i first believed through many dangers toils and snares i have already
|
|
||||||
come tis grace that brought me safe thus far and grace will lead me home'''
|
|
||||||
short_lyrics =u'''twas grace that taught my heart to fear and grace my fears relieved how precious did that
|
|
||||||
grace appear the hour i first believed'''
|
|
||||||
error_lyrics =u'''amazing how sweet the trumpet that saved a wrench like me i once was losst but now am
|
|
||||||
found waf blind but now i see it was grace that taught my heart to fear and grace my fears relieved how
|
|
||||||
precious did that grace appppppppear the hour i first believedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx snares i have
|
|
||||||
already come to this grace that brought me safe so far and grace will lead me home'''
|
|
||||||
different_lyrics=u'''on a hill far away stood an old rugged cross the emblem of suffering and shame and i love
|
|
||||||
that old cross where the dearest and best for a world of lost sinners was slain so ill cherish the old rugged
|
|
||||||
cross till my trophies at last i lay down i will cling to the old rugged cross and exchange it some day for a
|
|
||||||
crown'''
|
|
||||||
song1 = MagicMock()
|
|
||||||
song2 = MagicMock()
|
|
||||||
|
|
||||||
#GIVEN: Two equal songs.
|
#GIVEN: Two equal songs.
|
||||||
song1.search_lyrics = full_lyrics
|
self.song1.search_lyrics = self.full_lyrics
|
||||||
song2.search_lyrics = full_lyrics
|
self.song2.search_lyrics = self.full_lyrics
|
||||||
|
|
||||||
#WHEN: We compare those songs for equality.
|
#WHEN: We compare those songs for equality.
|
||||||
result = songs_probably_equal(song1, song2)
|
result = songs_probably_equal(self.song1, self.song2)
|
||||||
|
|
||||||
#THEN: The result should be True.
|
#THEN: The result should be True.
|
||||||
assert result is True, u'The result should be True'
|
assert result is True, u'The result should be True'
|
||||||
|
|
||||||
|
|
||||||
|
def songs_probably_equal_short_song_test(self):
|
||||||
|
"""
|
||||||
|
Test the songs_probably_equal function with a song and a shorter version of the same song.
|
||||||
|
"""
|
||||||
#GIVEN: A song and a short version of the same song.
|
#GIVEN: A song and a short version of the same song.
|
||||||
song1.search_lyrics = full_lyrics
|
self.song1.search_lyrics = self.full_lyrics
|
||||||
song2.search_lyrics = short_lyrics
|
self.song2.search_lyrics = self.short_lyrics
|
||||||
|
|
||||||
#WHEN: We compare those songs for equality.
|
#WHEN: We compare those songs for equality.
|
||||||
result = songs_probably_equal(song1, song2)
|
result = songs_probably_equal(self.song1, self.song2)
|
||||||
|
|
||||||
#THEN: The result should be True.
|
#THEN: The result should be True.
|
||||||
assert result is True, u'The result should be True'
|
assert result is True, u'The result should be True'
|
||||||
|
|
||||||
|
|
||||||
|
def songs_probably_equal_error_song_test(self):
|
||||||
|
"""
|
||||||
|
Test the songs_probably_equal function with a song and a very erroneous version of the same song.
|
||||||
|
"""
|
||||||
#GIVEN: A song and the same song with lots of errors.
|
#GIVEN: A song and the same song with lots of errors.
|
||||||
song1.search_lyrics = full_lyrics
|
self.song1.search_lyrics = self.full_lyrics
|
||||||
song2.search_lyrics = error_lyrics
|
self.song2.search_lyrics = self.error_lyrics
|
||||||
|
|
||||||
#WHEN: We compare those songs for equality.
|
#WHEN: We compare those songs for equality.
|
||||||
result = songs_probably_equal(song1, song2)
|
result = songs_probably_equal(self.song1, self.song2)
|
||||||
|
|
||||||
#THEN: The result should be True.
|
#THEN: The result should be True.
|
||||||
assert result is True, u'The result should be True'
|
assert result is True, u'The result should be True'
|
||||||
|
|
||||||
|
|
||||||
|
def songs_probably_equal_different_song_test(self):
|
||||||
|
"""
|
||||||
|
Test the songs_probably_equal function with two different songs.
|
||||||
|
"""
|
||||||
#GIVEN: Two different songs.
|
#GIVEN: Two different songs.
|
||||||
song1.search_lyrics = full_lyrics
|
self.song1.search_lyrics = self.full_lyrics
|
||||||
song2.search_lyrics = different_lyrics
|
self.song2.search_lyrics = self.different_lyrics
|
||||||
|
|
||||||
#WHEN: We compare those songs for equality.
|
#WHEN: We compare those songs for equality.
|
||||||
result = songs_probably_equal(song1, song2)
|
result = songs_probably_equal(self.song1, self.song2)
|
||||||
|
|
||||||
#THEN: The result should be False.
|
#THEN: The result should be False.
|
||||||
assert result is False, u'The result should be False'
|
assert result is False, u'The result should be False'
|
||||||
|
Loading…
Reference in New Issue
Block a user