Change duplicate check to pass int-string tuples to workers, to workaround windows issue, see bug #1388850

Fixes: https://launchpad.net/bugs/1388850
This commit is contained in:
Tomas Groth 2014-11-05 09:42:33 +01:00
parent af6a5322e2
commit 77c7da2d20
3 changed files with 33 additions and 31 deletions

View File

@ -48,14 +48,15 @@ log = logging.getLogger(__name__)
def song_generator(songs):
"""
This is a generator function to return tuples of two songs. When completed then all songs have once been returned
combined with any other songs.
This is a generator function to return tuples of tuple with two songs and their position in the song array.
When completed then all songs have once been returned combined with any other songs.
:param songs: All songs in the database.
"""
for outer_song_counter in range(len(songs) - 1):
for inner_song_counter in range(outer_song_counter + 1, len(songs)):
yield (songs[outer_song_counter], songs[inner_song_counter])
yield ((outer_song_counter, songs[outer_song_counter].search_lyrics),
(inner_song_counter, songs[inner_song_counter].search_lyrics))
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
@ -187,16 +188,17 @@ class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
# Do not accept any further tasks. Also this closes the processes if all tasks are done.
pool.close()
# While the processes are still working, start to look at the results.
for song_tuple in result:
for pos_tuple in result:
self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
# The call to process_events() will keep the GUI responsive.
self.application.process_events()
if self.break_search:
pool.terminate()
return
if song_tuple is None:
if pos_tuple is None:
continue
song1, song2 = song_tuple
song1 = songs[pos_tuple[0]]
song2 = songs[pos_tuple[1]]
duplicate_added = self.add_duplicates_to_song_list(song1, song2)
if duplicate_added:
self.found_duplicates_edit.appendPlainText(song1.title + " = " + song2.title)

View File

@ -59,12 +59,14 @@ def songs_probably_equal(song_tupel):
:param song_tupel: A tuple of two songs to compare.
"""
song1, song2 = song_tupel
if len(song1.search_lyrics) < len(song2.search_lyrics):
small = song1.search_lyrics
large = song2.search_lyrics
pos1, lyrics1 = song1
pos2, lyrics2 = song2
if len(lyrics1) < len(lyrics2):
small = lyrics1
large = lyrics2
else:
small = song2.search_lyrics
large = song1.search_lyrics
small = lyrics2
large = lyrics1
differ = difflib.SequenceMatcher(a=large, b=small)
diff_tuples = differ.get_opcodes()
diff_no_typos = _remove_typos(diff_tuples)
@ -77,7 +79,7 @@ def songs_probably_equal(song_tupel):
length_of_equal_blocks += _op_length(element)
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
return song1, song2
return pos1, pos2
# Check 2: Similarity based on the relative length of the longest equal block.
# Calculate the length of the largest equal block of the diff set.
length_of_longest_equal_block = 0
@ -85,7 +87,7 @@ def songs_probably_equal(song_tupel):
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
length_of_longest_equal_block = _op_length(element)
if length_of_longest_equal_block > len(small) * 2 // 3:
return song1, song2
return pos1, pos2
# Both checks failed. We assume the songs are not equal.
return None

View File

@ -58,8 +58,6 @@ class TestLib(TestCase):
i love that old cross where the dearest and best for a world of lost sinners was slain so ill cherish the
old rugged cross till my trophies at last i lay down i will cling to the old rugged cross and exchange it
some day for a crown'''
self.song1 = MagicMock()
self.song2 = MagicMock()
def clean_string_test(self):
"""
@ -92,53 +90,53 @@ class TestLib(TestCase):
Test the songs_probably_equal function with twice the same song.
"""
# GIVEN: Two equal songs.
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.full_lyrics
song_tuple1 = (2, self.full_lyrics)
song_tuple2 = (4, self.full_lyrics)
# WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2))
result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be a tuple..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
assert result == (2, 4), 'The result should be the tuble of song positions'
def songs_probably_equal_short_song_test(self):
"""
Test the songs_probably_equal function with a song and a shorter version of the same song.
"""
# GIVEN: A song and a short version of the same song.
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.short_lyrics
song_tuple1 = (1, self.full_lyrics)
song_tuple2 = (3, self.short_lyrics)
# WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2))
result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be a tuple..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
assert result == (1, 3), 'The result should be the tuble of song positions'
def songs_probably_equal_error_song_test(self):
"""
Test the songs_probably_equal function with a song and a very erroneous version of the same song.
"""
# GIVEN: A song and the same song with lots of errors.
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.error_lyrics
song_tuple1 = (4, self.full_lyrics)
song_tuple2 = (7, self.error_lyrics)
# WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2))
result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be a tuple of songs..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
# THEN: The result should be a tuple of song positions.
assert result == (4, 7), 'The result should be the tuble of song positions'
def songs_probably_equal_different_song_test(self):
"""
Test the songs_probably_equal function with two different songs.
"""
# GIVEN: Two different songs.
self.song1.search_lyrics = self.full_lyrics
self.song2.search_lyrics = self.different_lyrics
song_tuple1 = (5, self.full_lyrics)
song_tuple2 = (8, self.different_lyrics)
# WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2))
result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be None.
assert result is None, 'The result should be None'