forked from openlp/openlp
Simplify (and slightly speed up) song comparison logic by inlining two functions.
This commit is contained in:
parent
8f5dd8f649
commit
8de486f869
@ -70,10 +70,24 @@ def songs_probably_equal(song1, song2):
|
||||
differ = difflib.SequenceMatcher(a=large, b=small)
|
||||
diff_tuples = differ.get_opcodes()
|
||||
diff_no_typos = _remove_typos(diff_tuples)
|
||||
if _length_of_equal_blocks(diff_no_typos) >= MIN_BLOCK_SIZE or \
|
||||
_length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3:
|
||||
# Check 1: Similarity based on the absolute length of equal parts.
|
||||
# Calculate the total length of all equal blocks of the set.
|
||||
# Blocks smaller than min_block_size are not counted.
|
||||
length_of_equal_blocks = 0
|
||||
for element in diff_no_typos:
|
||||
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
|
||||
length_of_equal_blocks += _op_length(element)
|
||||
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
|
||||
return True
|
||||
else:
|
||||
# Check 2: Similarity based on the relative length of the longest equal block.
|
||||
# Calculate the length of the largest equal block of the diff set.
|
||||
length_of_longest_equal_block = 0
|
||||
for element in diff_no_typos:
|
||||
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
|
||||
length_of_longest_equal_block = _op_length(element)
|
||||
if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 / 3:
|
||||
return True
|
||||
# Both checks failed. We assume the songs are not equal.
|
||||
return False
|
||||
|
||||
|
||||
@ -122,32 +136,3 @@ def _remove_typos(diff):
|
||||
del diff[index + 1]
|
||||
|
||||
return diff
|
||||
|
||||
|
||||
def _length_of_equal_blocks(diff):
|
||||
"""
|
||||
Return the total length of all equal blocks in a diff set.
|
||||
Blocks smaller than min_block_size are not counted.
|
||||
|
||||
``diff``
|
||||
The diff set to return the length for.
|
||||
"""
|
||||
length = 0
|
||||
for element in diff:
|
||||
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
|
||||
length += _op_length(element)
|
||||
return length
|
||||
|
||||
|
||||
def _length_of_longest_equal_block(diff):
|
||||
"""
|
||||
Return the length of the largest equal block in a diff set.
|
||||
|
||||
``diff``
|
||||
The diff set to return the length for.
|
||||
"""
|
||||
length = 0
|
||||
for element in diff:
|
||||
if element[0] == "equal" and _op_length(element) > length:
|
||||
length = _op_length(element)
|
||||
return length
|
||||
|
@ -31,8 +31,7 @@ from unittest import TestCase
|
||||
|
||||
from mock import MagicMock
|
||||
|
||||
from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length, \
|
||||
_length_of_equal_blocks, _length_of_longest_equal_block
|
||||
from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length
|
||||
|
||||
class TestLib(TestCase):
|
||||
def setUp(self):
|
||||
@ -220,32 +219,3 @@ class TestLib(TestCase):
|
||||
|
||||
# THEN: The maximum length should be returned.
|
||||
assert result == 10, u'The length should be 10.'
|
||||
|
||||
|
||||
def length_of_equal_blocks_test(self):
|
||||
"""
|
||||
Test the _length_of_equal_blocks function.
|
||||
"""
|
||||
# GIVEN: A diff.
|
||||
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 110, 120, 110, 120), \
|
||||
('replace', 120, 200, 120, 200), ('equal', 200, 300, 200, 300)]
|
||||
|
||||
# WHEN: We calculate the length of that diffs equal blocks.
|
||||
result = _length_of_equal_blocks(diff)
|
||||
|
||||
# THEN: The total length should be returned. Note: Equals smaller 70 are ignored.
|
||||
assert result == 200, u'The length should be 200.'
|
||||
|
||||
|
||||
def length_of_longest_equal_block_test(self):
|
||||
"""
|
||||
Test the _length_of_longest_equal_block function.
|
||||
"""
|
||||
# GIVEN: A diff.
|
||||
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 200, 500, 200, 500)]
|
||||
|
||||
# WHEN: We calculate the length of that diffs longest equal block.
|
||||
result = _length_of_longest_equal_block(diff)
|
||||
|
||||
# dTHEN: The total correct length should be returned.
|
||||
assert result == 300, u'The length should be 300.'
|
Loading…
Reference in New Issue
Block a user