diff --git a/openlp/plugins/songs/forms/duplicatesongremovalform.py b/openlp/plugins/songs/forms/duplicatesongremovalform.py index 4bcce1c44..16867ea0b 100644 --- a/openlp/plugins/songs/forms/duplicatesongremovalform.py +++ b/openlp/plugins/songs/forms/duplicatesongremovalform.py @@ -33,6 +33,7 @@ The duplicate song removal logic for OpenLP. import logging import multiprocessing import os +import functools from PyQt4 import QtCore, QtGui @@ -46,17 +47,16 @@ from openlp.plugins.songs.lib.songcompare import songs_probably_equal log = logging.getLogger(__name__) -def song_generator(songs): +def tuple_generator(number_of_songs): """ - This is a generator function to return tuples of tuple with two songs and their position in the song array. - When completed then all songs have once been returned combined with any other songs. + This is a generator function to return tuples of two songs position. When completed then all songs position have + once been returned combined with any other songs position. - :param songs: All songs in the database. + :param number_of_songs: Number of songs in the DB. """ - for outer_song_counter in range(len(songs) - 1): - for inner_song_counter in range(outer_song_counter + 1, len(songs)): - yield ((outer_song_counter, songs[outer_song_counter].search_lyrics), - (inner_song_counter, songs[inner_song_counter].search_lyrics)) + for outer_song_counter in range(number_of_songs - 1): + for inner_song_counter in range(outer_song_counter + 1, number_of_songs): + yield (outer_song_counter, inner_song_counter) class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties): @@ -184,7 +184,9 @@ class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties): # Create a worker/process pool to check the songs. process_number = max(1, multiprocessing.cpu_count() - 1) pool = multiprocessing.Pool(process_number) - result = pool.imap_unordered(songs_probably_equal, song_generator(songs), 30) + # Create array with all lyrics + song_lyrics = [song.search_lyrics for song in songs] + result = pool.imap_unordered(functools.partial(songs_probably_equal, song_lyrics), tuple_generator(len(songs)), 30) # Do not accept any further tasks. Also this closes the processes if all tasks are done. pool.close() # While the processes are still working, start to look at the results. diff --git a/openlp/plugins/songs/lib/songcompare.py b/openlp/plugins/songs/lib/songcompare.py index ddd5e4552..9101245f5 100644 --- a/openlp/plugins/songs/lib/songcompare.py +++ b/openlp/plugins/songs/lib/songcompare.py @@ -52,15 +52,14 @@ MIN_BLOCK_SIZE = 70 MAX_TYPO_SIZE = 3 -def songs_probably_equal(song_tupel): +def songs_probably_equal(songs, pos_tupel): """ Calculate and return whether two songs are probably equal. :param song_tupel: A tuple of two songs to compare. """ - song1, song2 = song_tupel - pos1, lyrics1 = song1 - pos2, lyrics2 = song2 + lyrics1 = songs[pos_tupel[0]] + lyrics2 = songs[pos_tupel[1]] if len(lyrics1) < len(lyrics2): small = lyrics1 large = lyrics2 @@ -79,7 +78,7 @@ def songs_probably_equal(song_tupel): length_of_equal_blocks += _op_length(element) if length_of_equal_blocks >= MIN_BLOCK_SIZE: - return pos1, pos2 + return pos_tupel[0], pos_tupel[1] # Check 2: Similarity based on the relative length of the longest equal block. # Calculate the length of the largest equal block of the diff set. length_of_longest_equal_block = 0 @@ -87,7 +86,7 @@ def songs_probably_equal(song_tupel): if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block: length_of_longest_equal_block = _op_length(element) if length_of_longest_equal_block > len(small) * 2 // 3: - return pos1, pos2 + return pos_tupel[0], pos_tupel[1] # Both checks failed. We assume the songs are not equal. return None