forked from openlp/openlp
Simplify (and slightly speed up) song comparison logic by inlining two functions.
This commit is contained in:
parent
8f5dd8f649
commit
8de486f869
@ -70,10 +70,24 @@ def songs_probably_equal(song1, song2):
|
|||||||
differ = difflib.SequenceMatcher(a=large, b=small)
|
differ = difflib.SequenceMatcher(a=large, b=small)
|
||||||
diff_tuples = differ.get_opcodes()
|
diff_tuples = differ.get_opcodes()
|
||||||
diff_no_typos = _remove_typos(diff_tuples)
|
diff_no_typos = _remove_typos(diff_tuples)
|
||||||
if _length_of_equal_blocks(diff_no_typos) >= MIN_BLOCK_SIZE or \
|
# Check 1: Similarity based on the absolute length of equal parts.
|
||||||
_length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3:
|
# Calculate the total length of all equal blocks of the set.
|
||||||
|
# Blocks smaller than min_block_size are not counted.
|
||||||
|
length_of_equal_blocks = 0
|
||||||
|
for element in diff_no_typos:
|
||||||
|
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
|
||||||
|
length_of_equal_blocks += _op_length(element)
|
||||||
|
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
|
||||||
return True
|
return True
|
||||||
else:
|
# Check 2: Similarity based on the relative length of the longest equal block.
|
||||||
|
# Calculate the length of the largest equal block of the diff set.
|
||||||
|
length_of_longest_equal_block = 0
|
||||||
|
for element in diff_no_typos:
|
||||||
|
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
|
||||||
|
length_of_longest_equal_block = _op_length(element)
|
||||||
|
if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 / 3:
|
||||||
|
return True
|
||||||
|
# Both checks failed. We assume the songs are not equal.
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@ -122,32 +136,3 @@ def _remove_typos(diff):
|
|||||||
del diff[index + 1]
|
del diff[index + 1]
|
||||||
|
|
||||||
return diff
|
return diff
|
||||||
|
|
||||||
|
|
||||||
def _length_of_equal_blocks(diff):
|
|
||||||
"""
|
|
||||||
Return the total length of all equal blocks in a diff set.
|
|
||||||
Blocks smaller than min_block_size are not counted.
|
|
||||||
|
|
||||||
``diff``
|
|
||||||
The diff set to return the length for.
|
|
||||||
"""
|
|
||||||
length = 0
|
|
||||||
for element in diff:
|
|
||||||
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
|
|
||||||
length += _op_length(element)
|
|
||||||
return length
|
|
||||||
|
|
||||||
|
|
||||||
def _length_of_longest_equal_block(diff):
|
|
||||||
"""
|
|
||||||
Return the length of the largest equal block in a diff set.
|
|
||||||
|
|
||||||
``diff``
|
|
||||||
The diff set to return the length for.
|
|
||||||
"""
|
|
||||||
length = 0
|
|
||||||
for element in diff:
|
|
||||||
if element[0] == "equal" and _op_length(element) > length:
|
|
||||||
length = _op_length(element)
|
|
||||||
return length
|
|
||||||
|
@ -31,8 +31,7 @@ from unittest import TestCase
|
|||||||
|
|
||||||
from mock import MagicMock
|
from mock import MagicMock
|
||||||
|
|
||||||
from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length, \
|
from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length
|
||||||
_length_of_equal_blocks, _length_of_longest_equal_block
|
|
||||||
|
|
||||||
class TestLib(TestCase):
|
class TestLib(TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@ -220,32 +219,3 @@ class TestLib(TestCase):
|
|||||||
|
|
||||||
# THEN: The maximum length should be returned.
|
# THEN: The maximum length should be returned.
|
||||||
assert result == 10, u'The length should be 10.'
|
assert result == 10, u'The length should be 10.'
|
||||||
|
|
||||||
|
|
||||||
def length_of_equal_blocks_test(self):
|
|
||||||
"""
|
|
||||||
Test the _length_of_equal_blocks function.
|
|
||||||
"""
|
|
||||||
# GIVEN: A diff.
|
|
||||||
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 110, 120, 110, 120), \
|
|
||||||
('replace', 120, 200, 120, 200), ('equal', 200, 300, 200, 300)]
|
|
||||||
|
|
||||||
# WHEN: We calculate the length of that diffs equal blocks.
|
|
||||||
result = _length_of_equal_blocks(diff)
|
|
||||||
|
|
||||||
# THEN: The total length should be returned. Note: Equals smaller 70 are ignored.
|
|
||||||
assert result == 200, u'The length should be 200.'
|
|
||||||
|
|
||||||
|
|
||||||
def length_of_longest_equal_block_test(self):
|
|
||||||
"""
|
|
||||||
Test the _length_of_longest_equal_block function.
|
|
||||||
"""
|
|
||||||
# GIVEN: A diff.
|
|
||||||
diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 200, 500, 200, 500)]
|
|
||||||
|
|
||||||
# WHEN: We calculate the length of that diffs longest equal block.
|
|
||||||
result = _length_of_longest_equal_block(diff)
|
|
||||||
|
|
||||||
# dTHEN: The total correct length should be returned.
|
|
||||||
assert result == 300, u'The length should be 300.'
|
|
Loading…
Reference in New Issue
Block a user