Add a lot more test functions.

This commit is contained in:
Patrick Zimmermann 2013-02-21 00:29:54 +01:00
parent 3c8c136d3d
commit 2cf15f3b69
2 changed files with 161 additions and 25 deletions

View File

@ -69,15 +69,15 @@ def songs_probably_equal(song1, song2):
large = song1.search_lyrics
differ = difflib.SequenceMatcher(a=large, b=small)
diff_tuples = differ.get_opcodes()
diff_no_typos = __remove_typos(diff_tuples)
if __length_of_equal_blocks(diff_no_typos) >= min_block_size or \
__length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3:
diff_no_typos = _remove_typos(diff_tuples)
if _length_of_equal_blocks(diff_no_typos) >= min_block_size or \
_length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3:
return True
else:
return False
def __op_length(opcode):
def _op_length(opcode):
"""
Return the length of a given difference.
@ -87,7 +87,7 @@ def __op_length(opcode):
return max(opcode[2] - opcode[1], opcode[4] - opcode[3])
def __remove_typos(diff):
def _remove_typos(diff):
"""
Remove typos from a diff set. A typo is a small difference (<max_typo_size)
surrounded by larger equal passages (>min_fragment_size).
@ -97,26 +97,26 @@ def __remove_typos(diff):
"""
# Remove typo at beginning of the string.
if len(diff) >= 2:
if diff[0][0] != "equal" and __op_length(diff[0]) <= max_typo_size and \
__op_length(diff[1]) >= min_fragment_size:
if diff[0][0] != "equal" and _op_length(diff[0]) <= max_typo_size and \
_op_length(diff[1]) >= min_fragment_size:
del diff[0]
# Remove typos in the middle of the string.
if len(diff) >= 3:
for index in range(len(diff) - 3, -1, -1):
if __op_length(diff[index]) >= min_fragment_size and \
diff[index + 1][0] != "equal" and __op_length(diff[index + 1]) <= max_typo_size and \
__op_length(diff[index + 2]) >= min_fragment_size:
if _op_length(diff[index]) >= min_fragment_size and \
diff[index + 1][0] != "equal" and _op_length(diff[index + 1]) <= max_typo_size and \
_op_length(diff[index + 2]) >= min_fragment_size:
del diff[index + 1]
# Remove typo at the end of the string.
if len(diff) >= 2:
if __op_length(diff[-2]) >= min_fragment_size and \
diff[-1][0] != "equal" and __op_length(diff[-1]) <= max_typo_size:
if _op_length(diff[-2]) >= min_fragment_size and \
diff[-1][0] != "equal" and _op_length(diff[-1]) <= max_typo_size:
del diff[-1]
# Merge the bordering equal passages that occured by removing differences.
for index in range(len(diff) - 2, -1, -1):
if diff[index][0] == "equal" and __op_length(diff[index]) >= min_fragment_size and \
diff[index + 1][0] == "equal" and __op_length(diff[index + 1]) >= min_fragment_size:
if diff[index][0] == "equal" and _op_length(diff[index]) >= min_fragment_size and \
diff[index + 1][0] == "equal" and _op_length(diff[index + 1]) >= min_fragment_size:
diff[index] = ("equal", diff[index][1], diff[index + 1][2], diff[index][3],
diff[index + 1][4])
del diff[index + 1]
@ -124,7 +124,7 @@ def __remove_typos(diff):
return diff
def __length_of_equal_blocks(diff):
def _length_of_equal_blocks(diff):
"""
Return the total length of all equal blocks in a diff set.
Blocks smaller than min_block_size are not counted.
@ -134,12 +134,12 @@ def __length_of_equal_blocks(diff):
"""
length = 0
for element in diff:
if element[0] == "equal" and __op_length(element) >= min_block_size:
length += __op_length(element)
if element[0] == "equal" and _op_length(element) >= min_block_size:
length += _op_length(element)
return length
def __length_of_longest_equal_block(diff):
def _length_of_longest_equal_block(diff):
"""
Return the length of the largest equal block in a diff set.
@ -148,6 +148,6 @@ def __length_of_longest_equal_block(diff):
"""
length = 0
for element in diff:
if element[0] == "equal" and __op_length(element) > length:
length = __op_length(element)
if element[0] == "equal" and _op_length(element) > length:
length = _op_length(element)
return length

View File

@ -31,7 +31,8 @@ from unittest import TestCase
from mock import MagicMock
from openlp.plugins.songs.lib.songcompare import songs_probably_equal
from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length, \
_length_of_equal_blocks, _length_of_longest_equal_block
class TestLib(TestCase):
def setUp(self):
@ -55,6 +56,7 @@ class TestLib(TestCase):
self.song1 = MagicMock()
self.song2 = MagicMock()
def songs_probably_equal_same_song_test(self):
"""
Test the songs_probably_equal function with twice the same song.
@ -67,7 +69,7 @@ class TestLib(TestCase):
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be True.
assert result is True, u'The result should be True'
assert result == True, u'The result should be True'
def songs_probably_equal_short_song_test(self):
@ -82,7 +84,7 @@ class TestLib(TestCase):
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be True.
assert result is True, u'The result should be True'
assert result == True, u'The result should be True'
def songs_probably_equal_error_song_test(self):
@ -97,7 +99,7 @@ class TestLib(TestCase):
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be True.
assert result is True, u'The result should be True'
assert result == True, u'The result should be True'
def songs_probably_equal_different_song_test(self):
@ -112,4 +114,138 @@ class TestLib(TestCase):
result = songs_probably_equal(self.song1, self.song2)
#THEN: The result should be False.
assert result is False, u'The result should be False'
assert result == False, u'The result should be False'
def remove_typos_beginning_test(self):
"""
Test the _remove_typos function with a typo at the beginning.
"""
#GIVEN: A diffset with a difference at the beginning.
diff = [('replace', 0, 2, 0, 1), ('equal', 2, 11, 1, 10)]
#WHEN: We remove the typos in there.
result = _remove_typos(diff)
#THEN: There should be no typos at the beginning anymore.
assert len(result) == 1, u'The result should contain only one element.'
assert result[0][0] == 'equal', u'The result should contain an equal element.'
def remove_typos_beginning_negated_test(self):
"""
Test the _remove_typos function with a large difference at the beginning.
"""
#GIVEN: A diffset with a large difference at the beginning.
diff = [('replace', 0, 20, 0, 1), ('equal', 20, 29, 1, 10)]
#WHEN: We remove the typos in there.
result = _remove_typos(diff)
#THEN: There diff should not have changed.
assert result == diff
def remove_typos_end_test(self):
"""
Test the _remove_typos function with a typo at the end.
"""
#GIVEN: A diffset with a difference at the end.
diff = [('equal', 0, 10, 0, 10), ('replace', 10, 12, 10, 11)]
#WHEN: We remove the typos in there.
result = _remove_typos(diff)
#THEN: There should be no typos at the end anymore.
assert len(result) == 1, u'The result should contain only one element.'
assert result[0][0] == 'equal', u'The result should contain an equal element.'
def remove_typos_end_negated_test(self):
"""
Test the _remove_typos function with a large difference at the end.
"""
#GIVEN: A diffset with a large difference at the end.
diff = [('equal', 0, 10, 0, 10), ('replace', 10, 20, 10, 1)]
#WHEN: We remove the typos in there.
result = _remove_typos(diff)
#THEN: There diff should not have changed.
assert result == diff
def remove_typos_middle_test(self):
"""
Test the _remove_typos function with a typo in the middle.
"""
#GIVEN: A diffset with a difference in the middle.
diff = [('equal', 0, 10, 0, 10), ('replace', 10, 12, 10, 11), ('equal', 12, 22, 11, 21)]
#WHEN: We remove the typos in there.
result = _remove_typos(diff)
#THEN: There should be no typos in the middle anymore. The remaining equals should have been merged.
assert len(result) is 1, u'The result should contain only one element.'
assert result[0][0] == 'equal', u'The result should contain an equal element.'
assert result[0][1] == 0, u'The start indices should be kept.'
assert result[0][2] == 22, u'The stop indices should be kept.'
assert result[0][3] == 0, u'The start indices should be kept.'
assert result[0][4] == 21, u'The stop indices should be kept.'
def remove_typos_beginning_negated_test(self):
"""
Test the _remove_typos function with a large difference in the middle.
"""
#GIVEN: A diffset with a large difference in the middle.
diff = [('equal', 0, 10, 0, 10), ('replace', 10, 20, 10, 11), ('equal', 20, 30, 11, 21)]
#WHEN: We remove the typos in there.
result = _remove_typos(diff)
#THEN: There diff should not have changed.
assert result == diff
def op_length_test(self):
"""
Test the _op_length function.
"""
#GIVEN: A diff entry.
diff_entry = ('replace', 0, 2, 4, 14)
#WHEN: We calculate the length of that diff.
result = _op_length(diff_entry)
#THEN: The maximum length should be returned.
assert result == 10, u'The length should be 10.'
def length_of_equal_blocks_test(self):
"""
Test the _length_of_equal_blocks function.
"""
#GIVEN: A diff.
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 110, 120, 110, 120), \
('replace', 120, 200, 120, 200), ('equal', 200, 300, 200, 300)]
#WHEN: We calculate the length of that diffs equal blocks.
result = _length_of_equal_blocks(diff)
#THEN: The total length should be returned. Note: Equals smaller 70 are ignored.
assert result == 200, u'The length should be 200.'
def length_of_longest_equal_block_test(self):
"""
Test the _length_of_longest_equal_block function.
"""
#GIVEN: A diff.
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 200, 500, 200, 500)]
#WHEN: We calculate the length of that diffs longest equal block.
result = _length_of_longest_equal_block(diff)
#THEN: The total correct length should be returned.
assert result == 300, u'The length should be 300.'