From 8de486f869efb862d3821f17559e1bb7e096d2ca Mon Sep 17 00:00:00 2001 From: Patrick Zimmermann Date: Thu, 28 Feb 2013 23:20:48 +0100 Subject: [PATCH] Simplify (and slightly speed up) song comparison logic by inlining two functions. --- openlp/plugins/songs/lib/songcompare.py | 53 +++++++------------ .../openlp_plugins/songs/test_lib.py | 32 +---------- 2 files changed, 20 insertions(+), 65 deletions(-) diff --git a/openlp/plugins/songs/lib/songcompare.py b/openlp/plugins/songs/lib/songcompare.py index 543156314..a98e61380 100644 --- a/openlp/plugins/songs/lib/songcompare.py +++ b/openlp/plugins/songs/lib/songcompare.py @@ -70,11 +70,25 @@ def songs_probably_equal(song1, song2): differ = difflib.SequenceMatcher(a=large, b=small) diff_tuples = differ.get_opcodes() diff_no_typos = _remove_typos(diff_tuples) - if _length_of_equal_blocks(diff_no_typos) >= MIN_BLOCK_SIZE or \ - _length_of_longest_equal_block(diff_no_typos) > len(small) * 2 / 3: - return True - else: - return False + # Check 1: Similarity based on the absolute length of equal parts. + # Calculate the total length of all equal blocks of the set. + # Blocks smaller than min_block_size are not counted. + length_of_equal_blocks = 0 + for element in diff_no_typos: + if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE: + length_of_equal_blocks += _op_length(element) + if length_of_equal_blocks >= MIN_BLOCK_SIZE: + return True + # Check 2: Similarity based on the relative length of the longest equal block. + # Calculate the length of the largest equal block of the diff set. + length_of_longest_equal_block = 0 + for element in diff_no_typos: + if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block: + length_of_longest_equal_block = _op_length(element) + if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 / 3: + return True + # Both checks failed. We assume the songs are not equal. + return False def _op_length(opcode): @@ -122,32 +136,3 @@ def _remove_typos(diff): del diff[index + 1] return diff - - -def _length_of_equal_blocks(diff): - """ - Return the total length of all equal blocks in a diff set. - Blocks smaller than min_block_size are not counted. - - ``diff`` - The diff set to return the length for. - """ - length = 0 - for element in diff: - if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE: - length += _op_length(element) - return length - - -def _length_of_longest_equal_block(diff): - """ - Return the length of the largest equal block in a diff set. - - ``diff`` - The diff set to return the length for. - """ - length = 0 - for element in diff: - if element[0] == "equal" and _op_length(element) > length: - length = _op_length(element) - return length diff --git a/tests/functional/openlp_plugins/songs/test_lib.py b/tests/functional/openlp_plugins/songs/test_lib.py index be82d3db0..b79f51132 100644 --- a/tests/functional/openlp_plugins/songs/test_lib.py +++ b/tests/functional/openlp_plugins/songs/test_lib.py @@ -31,8 +31,7 @@ from unittest import TestCase from mock import MagicMock -from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length, \ - _length_of_equal_blocks, _length_of_longest_equal_block +from openlp.plugins.songs.lib.songcompare import songs_probably_equal, _remove_typos, _op_length class TestLib(TestCase): def setUp(self): @@ -220,32 +219,3 @@ class TestLib(TestCase): # THEN: The maximum length should be returned. assert result == 10, u'The length should be 10.' - - - def length_of_equal_blocks_test(self): - """ - Test the _length_of_equal_blocks function. - """ - # GIVEN: A diff. - diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 110, 120, 110, 120), \ - ('replace', 120, 200, 120, 200), ('equal', 200, 300, 200, 300)] - - # WHEN: We calculate the length of that diffs equal blocks. - result = _length_of_equal_blocks(diff) - - # THEN: The total length should be returned. Note: Equals smaller 70 are ignored. - assert result == 200, u'The length should be 200.' - - - def length_of_longest_equal_block_test(self): - """ - Test the _length_of_longest_equal_block function. - """ - # GIVEN: A diff. - diff = [('equal', 0, 100, 0, 100), ('replace', 100, 110, 100, 110), ('equal', 200, 500, 200, 500)] - - # WHEN: We calculate the length of that diffs longest equal block. - result = _length_of_longest_equal_block(diff) - - # dTHEN: The total correct length should be returned. - assert result == 300, u'The length should be 300.' \ No newline at end of file