Change duplicate check to pass int-string tuples to workers, to workaround windows issue, see bug #1388850

Fixes: https://launchpad.net/bugs/1388850
This commit is contained in:
Tomas Groth 2014-11-05 09:42:33 +01:00
parent af6a5322e2
commit 77c7da2d20
3 changed files with 33 additions and 31 deletions

View File

@ -48,14 +48,15 @@ log = logging.getLogger(__name__)
def song_generator(songs): def song_generator(songs):
""" """
This is a generator function to return tuples of two songs. When completed then all songs have once been returned This is a generator function to return tuples of tuple with two songs and their position in the song array.
combined with any other songs. When completed then all songs have once been returned combined with any other songs.
:param songs: All songs in the database. :param songs: All songs in the database.
""" """
for outer_song_counter in range(len(songs) - 1): for outer_song_counter in range(len(songs) - 1):
for inner_song_counter in range(outer_song_counter + 1, len(songs)): for inner_song_counter in range(outer_song_counter + 1, len(songs)):
yield (songs[outer_song_counter], songs[inner_song_counter]) yield ((outer_song_counter, songs[outer_song_counter].search_lyrics),
(inner_song_counter, songs[inner_song_counter].search_lyrics))
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties): class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
@ -187,16 +188,17 @@ class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
# Do not accept any further tasks. Also this closes the processes if all tasks are done. # Do not accept any further tasks. Also this closes the processes if all tasks are done.
pool.close() pool.close()
# While the processes are still working, start to look at the results. # While the processes are still working, start to look at the results.
for song_tuple in result: for pos_tuple in result:
self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1) self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
# The call to process_events() will keep the GUI responsive. # The call to process_events() will keep the GUI responsive.
self.application.process_events() self.application.process_events()
if self.break_search: if self.break_search:
pool.terminate() pool.terminate()
return return
if song_tuple is None: if pos_tuple is None:
continue continue
song1, song2 = song_tuple song1 = songs[pos_tuple[0]]
song2 = songs[pos_tuple[1]]
duplicate_added = self.add_duplicates_to_song_list(song1, song2) duplicate_added = self.add_duplicates_to_song_list(song1, song2)
if duplicate_added: if duplicate_added:
self.found_duplicates_edit.appendPlainText(song1.title + " = " + song2.title) self.found_duplicates_edit.appendPlainText(song1.title + " = " + song2.title)

View File

@ -59,12 +59,14 @@ def songs_probably_equal(song_tupel):
:param song_tupel: A tuple of two songs to compare. :param song_tupel: A tuple of two songs to compare.
""" """
song1, song2 = song_tupel song1, song2 = song_tupel
if len(song1.search_lyrics) < len(song2.search_lyrics): pos1, lyrics1 = song1
small = song1.search_lyrics pos2, lyrics2 = song2
large = song2.search_lyrics if len(lyrics1) < len(lyrics2):
small = lyrics1
large = lyrics2
else: else:
small = song2.search_lyrics small = lyrics2
large = song1.search_lyrics large = lyrics1
differ = difflib.SequenceMatcher(a=large, b=small) differ = difflib.SequenceMatcher(a=large, b=small)
diff_tuples = differ.get_opcodes() diff_tuples = differ.get_opcodes()
diff_no_typos = _remove_typos(diff_tuples) diff_no_typos = _remove_typos(diff_tuples)
@ -77,7 +79,7 @@ def songs_probably_equal(song_tupel):
length_of_equal_blocks += _op_length(element) length_of_equal_blocks += _op_length(element)
if length_of_equal_blocks >= MIN_BLOCK_SIZE: if length_of_equal_blocks >= MIN_BLOCK_SIZE:
return song1, song2 return pos1, pos2
# Check 2: Similarity based on the relative length of the longest equal block. # Check 2: Similarity based on the relative length of the longest equal block.
# Calculate the length of the largest equal block of the diff set. # Calculate the length of the largest equal block of the diff set.
length_of_longest_equal_block = 0 length_of_longest_equal_block = 0
@ -85,7 +87,7 @@ def songs_probably_equal(song_tupel):
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block: if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
length_of_longest_equal_block = _op_length(element) length_of_longest_equal_block = _op_length(element)
if length_of_longest_equal_block > len(small) * 2 // 3: if length_of_longest_equal_block > len(small) * 2 // 3:
return song1, song2 return pos1, pos2
# Both checks failed. We assume the songs are not equal. # Both checks failed. We assume the songs are not equal.
return None return None

View File

@ -58,8 +58,6 @@ class TestLib(TestCase):
i love that old cross where the dearest and best for a world of lost sinners was slain so ill cherish the i love that old cross where the dearest and best for a world of lost sinners was slain so ill cherish the
old rugged cross till my trophies at last i lay down i will cling to the old rugged cross and exchange it old rugged cross till my trophies at last i lay down i will cling to the old rugged cross and exchange it
some day for a crown''' some day for a crown'''
self.song1 = MagicMock()
self.song2 = MagicMock()
def clean_string_test(self): def clean_string_test(self):
""" """
@ -92,53 +90,53 @@ class TestLib(TestCase):
Test the songs_probably_equal function with twice the same song. Test the songs_probably_equal function with twice the same song.
""" """
# GIVEN: Two equal songs. # GIVEN: Two equal songs.
self.song1.search_lyrics = self.full_lyrics song_tuple1 = (2, self.full_lyrics)
self.song2.search_lyrics = self.full_lyrics song_tuple2 = (4, self.full_lyrics)
# WHEN: We compare those songs for equality. # WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2)) result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be a tuple.. # THEN: The result should be a tuple..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs' assert result == (2, 4), 'The result should be the tuble of song positions'
def songs_probably_equal_short_song_test(self): def songs_probably_equal_short_song_test(self):
""" """
Test the songs_probably_equal function with a song and a shorter version of the same song. Test the songs_probably_equal function with a song and a shorter version of the same song.
""" """
# GIVEN: A song and a short version of the same song. # GIVEN: A song and a short version of the same song.
self.song1.search_lyrics = self.full_lyrics song_tuple1 = (1, self.full_lyrics)
self.song2.search_lyrics = self.short_lyrics song_tuple2 = (3, self.short_lyrics)
# WHEN: We compare those songs for equality. # WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2)) result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be a tuple.. # THEN: The result should be a tuple..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs' assert result == (1, 3), 'The result should be the tuble of song positions'
def songs_probably_equal_error_song_test(self): def songs_probably_equal_error_song_test(self):
""" """
Test the songs_probably_equal function with a song and a very erroneous version of the same song. Test the songs_probably_equal function with a song and a very erroneous version of the same song.
""" """
# GIVEN: A song and the same song with lots of errors. # GIVEN: A song and the same song with lots of errors.
self.song1.search_lyrics = self.full_lyrics song_tuple1 = (4, self.full_lyrics)
self.song2.search_lyrics = self.error_lyrics song_tuple2 = (7, self.error_lyrics)
# WHEN: We compare those songs for equality. # WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2)) result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be a tuple of songs.. # THEN: The result should be a tuple of song positions.
assert result == (self.song1, self.song2), 'The result should be the tuble of songs' assert result == (4, 7), 'The result should be the tuble of song positions'
def songs_probably_equal_different_song_test(self): def songs_probably_equal_different_song_test(self):
""" """
Test the songs_probably_equal function with two different songs. Test the songs_probably_equal function with two different songs.
""" """
# GIVEN: Two different songs. # GIVEN: Two different songs.
self.song1.search_lyrics = self.full_lyrics song_tuple1 = (5, self.full_lyrics)
self.song2.search_lyrics = self.different_lyrics song_tuple2 = (8, self.different_lyrics)
# WHEN: We compare those songs for equality. # WHEN: We compare those songs for equality.
result = songs_probably_equal((self.song1, self.song2)) result = songs_probably_equal((song_tuple1, song_tuple2))
# THEN: The result should be None. # THEN: The result should be None.
assert result is None, 'The result should be None' assert result is None, 'The result should be None'