diff --git a/openlp/plugins/songs/forms/duplicatesongremovalform.py b/openlp/plugins/songs/forms/duplicatesongremovalform.py index 2da9113d6..7f65bf3ac 100644 --- a/openlp/plugins/songs/forms/duplicatesongremovalform.py +++ b/openlp/plugins/songs/forms/duplicatesongremovalform.py @@ -31,6 +31,7 @@ The duplicate song removal logic for OpenLP. """ import logging +import multiprocessing import os from PyQt4 import QtCore, QtGui @@ -45,6 +46,17 @@ from openlp.plugins.songs.lib.songcompare import songs_probably_equal log = logging.getLogger(__name__) +class SongIterator(object): + def __init__(self, songs): + self.songs = songs + + def __iter__(self): + for outer_song_counter in range(len(self.songs) - 1): + for inner_song_counter in range(outer_song_counter + 1, len(self.songs)): + yield (self.songs[outer_song_counter], self.songs[inner_song_counter]) + + + class DuplicateSongRemovalForm(OpenLPWizard): """ This is the Duplicate Song Removal Wizard. It provides functionality to search for and remove duplicate songs @@ -167,24 +179,32 @@ class DuplicateSongRemovalForm(OpenLPWizard): max_progress_count = max_songs * (max_songs - 1) // 2 self.duplicate_search_progress_bar.setMaximum(max_progress_count) songs = self.plugin.manager.get_all_objects(Song) - for outer_song_counter in range(max_songs - 1): - for inner_song_counter in range(outer_song_counter + 1, max_songs): - if songs_probably_equal(songs[outer_song_counter], songs[inner_song_counter]): - duplicate_added = self.add_duplicates_to_song_list( - songs[outer_song_counter], songs[inner_song_counter]) - if duplicate_added: - self.found_duplicates_edit.appendPlainText( - songs[outer_song_counter].title + " = " + songs[inner_song_counter].title) - self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1) - # The call to process_events() will keep the GUI responsive. - self.application.process_events() - if self.break_search: - return - self.review_total_count = len(self.duplicate_song_list) - if self.review_total_count == 0: - self.notify_no_duplicates() - else: + # Create a worker/process pool to check the songs. + process_number = max(1, multiprocessing.cpu_count() - 1) + pool = multiprocessing.Pool(process_number) + song_list = SongIterator(songs) + #song_list = [(songs[outer_song_counter], songs[inner_song_counter]) for outer_song_counter in range(max_songs - 1) for inner_song_counter in range(outer_song_counter + 1, max_songs)] + result = pool.imap_unordered(songs_probably_equal, song_list, 30) + # Do not accept any further tasks. Also this closes the processes if all tasks are done. + pool.close() + # While the processes are still working, start to look at the results. + for song_tuple in result: + self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1) + # The call to process_events() will keep the GUI responsive. + self.application.process_events() + if self.break_search: + pool.terminate() + return + if song_tuple is None: + continue + song1, song2 = song_tuple + duplicate_added = self.add_duplicates_to_song_list(song1, song2) + if duplicate_added: + self.found_duplicates_edit.appendPlainText(song1.title + " = " + song2.title) + if self.duplicate_song_list: self.button(QtGui.QWizard.NextButton).show() + else: + self.notify_no_duplicates() finally: self.application.set_normal_cursor() elif page_id == self.review_page_id: diff --git a/openlp/plugins/songs/lib/songcompare.py b/openlp/plugins/songs/lib/songcompare.py index 99a04beb2..09dacd649 100644 --- a/openlp/plugins/songs/lib/songcompare.py +++ b/openlp/plugins/songs/lib/songcompare.py @@ -52,13 +52,15 @@ MIN_BLOCK_SIZE = 70 MAX_TYPO_SIZE = 3 -def songs_probably_equal(song1, song2): +def songs_probably_equal(song1, song2=None): """ Calculate and return whether two songs are probably equal. :param song1: The first song to compare. :param song2: The second song to compare. """ + if song2 is None: + song1, song2 = song1 if len(song1.search_lyrics) < len(song2.search_lyrics): small = song1.search_lyrics large = song2.search_lyrics @@ -75,8 +77,9 @@ def songs_probably_equal(song1, song2): for element in diff_no_typos: if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE: length_of_equal_blocks += _op_length(element) + if length_of_equal_blocks >= MIN_BLOCK_SIZE: - return True + return song1, song2 # Check 2: Similarity based on the relative length of the longest equal block. # Calculate the length of the largest equal block of the diff set. length_of_longest_equal_block = 0 @@ -84,9 +87,9 @@ def songs_probably_equal(song1, song2): if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block: length_of_longest_equal_block = _op_length(element) if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 // 3: - return True + return song1, song2 # Both checks failed. We assume the songs are not equal. - return False + return None def _op_length(opcode):