Simplify (and slightly speed up) song comparison logic by inlining two functions.

This commit is contained in:
Patrick Zimmermann 2013-02-28 23:20:48 +01:00
parent 8f5dd8f649
commit 8de486f869
2 changed files with 20 additions and 65 deletions

View File

@ -70,10 +70,24 @@ def songs_probably_equal(song1, song2):
differ = difflib.SequenceMatcher(a=large, b=small) differ = difflib.SequenceMatcher(a=large, b=small)
diff_tuples = differ.get_opcodes() diff_tuples = differ.get_opcodes()
diff_no_typos = _remove_typos(diff_tuples) diff_no_typos = _remove_typos(diff_tuples)
if _length_of_equal_blocks(diff_no_typos) >= MIN_BLOCK_SIZE or \ # Check 1: Similarity based on the absolute length of equal parts.
_length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3: # Calculate the total length of all equal blocks of the set.
# Blocks smaller than min_block_size are not counted.
length_of_equal_blocks = 0
for element in diff_no_typos:
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
length_of_equal_blocks += _op_length(element)
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
return True return True
else: # Check 2: Similarity based on the relative length of the longest equal block.
# Calculate the length of the largest equal block of the diff set.
length_of_longest_equal_block = 0
for element in diff_no_typos:
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
length_of_longest_equal_block = _op_length(element)
if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 / 3:
return True
# Both checks failed. We assume the songs are not equal.
return False return False
@ -122,32 +136,3 @@ def _remove_typos(diff):
del diff[index + 1] del diff[index + 1]
return diff return diff
def _length_of_equal_blocks(diff):
"""
Return the total length of all equal blocks in a diff set.
Blocks smaller than min_block_size are not counted.
``diff``
The diff set to return the length for.
"""
length = 0
for element in diff:
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
length += _op_length(element)
return length
def _length_of_longest_equal_block(diff):
"""
Return the length of the largest equal block in a diff set.
``diff``
The diff set to return the length for.
"""
length = 0
for element in diff:
if element[0] == "equal" and _op_length(element) > length:
length = _op_length(element)
return length

View File

@ -31,8 +31,7 @@ from unittest import TestCase
from mock import MagicMock from mock import MagicMock
from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length, \ from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length
_length_of_equal_blocks, _length_of_longest_equal_block
class TestLib(TestCase): class TestLib(TestCase):
def setUp(self): def setUp(self):
@ -220,32 +219,3 @@ class TestLib(TestCase):
# THEN: The maximum length should be returned. # THEN: The maximum length should be returned.
assert result == 10, u'The length should be 10.' assert result == 10, u'The length should be 10.'
def length_of_equal_blocks_test(self):
"""
Test the _length_of_equal_blocks function.
"""
# GIVEN: A diff.
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 110, 120, 110, 120), \
('replace', 120, 200, 120, 200), ('equal', 200, 300, 200, 300)]
# WHEN: We calculate the length of that diffs equal blocks.
result = _length_of_equal_blocks(diff)
# THEN: The total length should be returned. Note: Equals smaller 70 are ignored.
assert result == 200, u'The length should be 200.'
def length_of_longest_equal_block_test(self):
"""
Test the _length_of_longest_equal_block function.
"""
# GIVEN: A diff.
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 200, 500, 200, 500)]
# WHEN: We calculate the length of that diffs longest equal block.
result = _length_of_longest_equal_block(diff)
# dTHEN: The total correct length should be returned.
assert result == 300, u'The length should be 300.'