Remove non-needed test __init__ file. Split up testfunctions.

This commit is contained in:
Patrick Zimmermann 2013-02-18 22:42:04 +01:00
parent 8c8cd3b867
commit 904620998f
3 changed files with 205 additions and 42 deletions

View File

@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
# Copyright (c) 2008-2013 Raoul Snyman #
# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan #
# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub, #
# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer. #
# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru, #
# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith, #
# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock, #
# Frode Woldsund, Martin Zibricky, Patrick Zimmermann #
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
"""
The :mod:`songcompare` module provides functionality to search for
duplicate songs. It has one single :function:`songs_probably_equal`.
The algorithm is based on the diff algorithm.
First a diffset is calculated for two songs.
To compensate for typos all differences that are smaller than a
limit (<max_typo_size) and are surrounded by larger equal blocks
(>min_fragment_size) are removed and the surrounding equal parts are merged.
Finally two conditions can qualify a song tuple to be a duplicate:
1. There is a block of equal content that is at least min_block_size large.
This condition should hit for all larger songs that have a long enough
equal part. Even if only one verse is equal this condition should still hit.
2. Two thirds of the smaller song is contained in the larger song.
This condition should hit if one of the two songs (or both) is small (smaller
than the min_block_size), but most of the song is contained in the other song.
"""
import difflib
min_fragment_size = 5
min_block_size = 70
max_typo_size = 3
def songs_probably_equal(song1, song2):
"""
Calculate and return whether two songs are probably equal.
``song1``
The first song to compare.
``song2``
The second song to compare.
"""
if len(song1.search_lyrics) < len(song2.search_lyrics):
small = song1.search_lyrics
large = song2.search_lyrics
else:
small = song2.search_lyrics
large = song1.search_lyrics
differ = difflib.SequenceMatcher(a=large, b=small)
diff_tuples = differ.get_opcodes()
diff_no_typos = __remove_typos(diff_tuples)
if __length_of_equal_blocks(diff_no_typos) >= min_block_size or \
__length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3:
return True
else:
return False
def __op_length(opcode):
"""
Return the length of a given difference.
``opcode``
The difference.
"""
return max(opcode[2] - opcode[1], opcode[4] - opcode[3])
def __remove_typos(diff):
"""
Remove typos from a diff set. A typo is a small difference (<max_typo_size)
surrounded by larger equal passages (>min_fragment_size).
``diff``
The diff set to remove the typos from.
"""
# Remove typo at beginning of the string.
if len(diff) >= 2:
if diff[0][0] != "equal" and __op_length(diff[0]) <= max_typo_size and \
__op_length(diff[1]) >= min_fragment_size:
del diff[0]
# Remove typos in the middle of the string.
if len(diff) >= 3:
for index in range(len(diff) - 3, -1, -1):
if __op_length(diff[index]) >= min_fragment_size and \
diff[index + 1][0] != "equal" and __op_length(diff[index + 1]) <= max_typo_size and \
__op_length(diff[index + 2]) >= min_fragment_size:
del diff[index + 1]
# Remove typo at the end of the string.
if len(diff) >= 2:
if __op_length(diff[-2]) >= min_fragment_size and \
diff[-1][0] != "equal" and __op_length(diff[-1]) <= max_typo_size:
del diff[-1]
# Merge the bordering equal passages that occured by removing differences.
for index in range(len(diff) - 2, -1, -1):
if diff[index][0] == "equal" and __op_length(diff[index]) >= min_fragment_size and \
diff[index + 1][0] == "equal" and __op_length(diff[index + 1]) >= min_fragment_size:
diff[index] = ("equal", diff[index][1], diff[index + 1][2], diff[index][3],
diff[index + 1][4])
del diff[index + 1]
return diff
def __length_of_equal_blocks(diff):
"""
Return the total length of all equal blocks in a diff set.
Blocks smaller than min_block_size are not counted.
``diff``
The diff set to return the length for.
"""
length = 0
for element in diff:
if element[0] == "equal" and __op_length(element) >= min_block_size:
length += __op_length(element)
return length
def __length_of_longest_equal_block(diff):
"""
Return the length of the largest equal block in a diff set.
``diff``
The diff set to return the length for.
"""
length = 0
for element in diff:
if element[0] == "equal" and __op_length(element) > length:
length = __op_length(element)
return length

View File

@ -1,8 +0,0 @@
import sip
sip.setapi(u'QDate', 2)
sip.setapi(u'QDateTime', 2)
sip.setapi(u'QString', 2)
sip.setapi(u'QTextStream', 2)
sip.setapi(u'QTime', 2)
sip.setapi(u'QUrl', 2)
sip.setapi(u'QVariant', 2)

View File

@ -34,64 +34,82 @@ from mock import MagicMock
from openlp.plugins.songs.lib.songcompare import songs_probably_equal
class TestLib(TestCase):
def songs_probably_equal_test(self):
def setUp(self):
"""
Test the songs_probably_equal function.
Mock up two songs and provide a set of lyrics for the songs_probably_equal tests.
"""
full_lyrics =u'''amazing grace how sweet the sound that saved a wretch like me i once was lost but now am
self.full_lyrics =u'''amazing grace how sweet the sound that saved a wretch like me i once was lost but now am
found was blind but now i see twas grace that taught my heart to fear and grace my fears relieved how
precious did that grace appear the hour i first believed through many dangers toils and snares i have already
come tis grace that brought me safe thus far and grace will lead me home'''
short_lyrics =u'''twas grace that taught my heart to fear and grace my fears relieved how precious did that
self.short_lyrics =u'''twas grace that taught my heart to fear and grace my fears relieved how precious did that
grace appear the hour i first believed'''
error_lyrics =u'''amazing how sweet the trumpet that saved a wrench like me i once was losst but now am
self.error_lyrics =u'''amazing how sweet the trumpet that saved a wrench like me i once was losst but now am
found waf blind but now i see it was grace that taught my heart to fear and grace my fears relieved how
precious did that grace appppppppear the hour i first believedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx snares i have
already come to this grace that brought me safe so far and grace will lead me home'''
different_lyrics=u'''on a hill far away stood an old rugged cross the emblem of suffering and shame and i love
self.different_lyrics=u'''on a hill far away stood an old rugged cross the emblem of suffering and shame and i love
that old cross where the dearest and best for a world of lost sinners was slain so ill cherish the old rugged
cross till my trophies at last i lay down i will cling to the old rugged cross and exchange it some day for a
crown'''
song1 = MagicMock()
song2 = MagicMock()
self.song1 = MagicMock()
self.song2 = MagicMock()
def songs_probably_equal_same_song_test(self):
"""
Test the songs_probably_equal function with twice the same song.
"""
#GIVEN: Two equal songs.
song1.search_lyrics = full_lyrics
song2.search_lyrics = full_lyrics
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.full_lyrics
#WHEN: We compare those songs for equality.
result = songs_probably_equal(song1, song2)
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be True.
assert result is True, u'The result should be True'
def songs_probably_equal_short_song_test(self):
"""
Test the songs_probably_equal function with a song and a shorter version of the same song.
"""
#GIVEN: A song and a short version of the same song.
song1.search_lyrics = full_lyrics
song2.search_lyrics = short_lyrics
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.short_lyrics
#WHEN: We compare those songs for equality.
result = songs_probably_equal(song1, song2)
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be True.
assert result is True, u'The result should be True'
def songs_probably_equal_error_song_test(self):
"""
Test the songs_probably_equal function with a song and a very erroneous version of the same song.
"""
#GIVEN: A song and the same song with lots of errors.
song1.search_lyrics = full_lyrics
song2.search_lyrics = error_lyrics
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.error_lyrics
#WHEN: We compare those songs for equality.
result = songs_probably_equal(song1, song2)
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be True.
assert result is True, u'The result should be True'
def songs_probably_equal_different_song_test(self):
"""
Test the songs_probably_equal function with two different songs.
"""
#GIVEN: Two different songs.
song1.search_lyrics = full_lyrics
song2.search_lyrics = different_lyrics
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.different_lyrics
#WHEN: We compare those songs for equality.
result = songs_probably_equal(song1, song2)
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be False.
assert result is False, u'The result should be False'