use a worker pool to compare songs in the duplicate finder

bzr-revno: 2355
This commit is contained in:
Andreas Preikschat 2014-04-15 22:43:01 +02:00 committed by Raoul Snyman
commit 7a93e821ba
5 changed files with 97 additions and 40 deletions

View File

@ -235,7 +235,8 @@ class Ui_ServiceManager(object):
self.edit_action = create_widget_action(self.menu, text=translate('OpenLP.ServiceManager', '&Edit Item'),
icon=':/general/general_edit.png', triggers=self.remote_edit)
self.rename_action = create_widget_action(self.menu, text=translate('OpenLP.ServiceManager', '&Rename...'),
icon=':/general/general_edit.png', triggers=self.on_service_item_rename)
icon=':/general/general_edit.png',
triggers=self.on_service_item_rename)
self.maintain_action = create_widget_action(self.menu, text=translate('OpenLP.ServiceManager', '&Reorder Item'),
icon=':/general/general_edit.png',
triggers=self.on_service_item_edit_form)
@ -1488,9 +1489,11 @@ class ServiceManager(OpenLPMixin, RegistryMixin, QtGui.QWidget, Ui_ServiceManage
if new_item:
self.add_service_item(new_item, replace=True)
def on_service_item_rename(self):
def on_service_item_rename(self, field=None):
"""
Opens a dialog to rename the service item.
:param field: Not used, but PyQt needs this.
"""
item = self.find_service_item()[0]
if not self.service_items[item]['service_item'].is_capable(ItemCapabilities.CanEditTitle):

View File

@ -31,6 +31,7 @@ The duplicate song removal logic for OpenLP.
"""
import logging
import multiprocessing
import os
from PyQt4 import QtCore, QtGui
@ -45,6 +46,18 @@ from openlp.plugins.songs.lib.songcompare import songs_probably_equal
log = logging.getLogger(__name__)
def song_generator(songs):
"""
This is a generator function to return tuples of two songs. When completed then all songs have once been returned
combined with any other songs.
:param songs: All songs in the database.
"""
for outer_song_counter in range(len(songs) - 1):
for inner_song_counter in range(outer_song_counter + 1, len(songs)):
yield (songs[outer_song_counter], songs[inner_song_counter])
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
"""
This is the Duplicate Song Removal Wizard. It provides functionality to search for and remove duplicate songs
@ -167,24 +180,31 @@ class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
max_progress_count = max_songs * (max_songs - 1) // 2
self.duplicate_search_progress_bar.setMaximum(max_progress_count)
songs = self.plugin.manager.get_all_objects(Song)
for outer_song_counter in range(max_songs - 1):
for inner_song_counter in range(outer_song_counter + 1, max_songs):
if songs_probably_equal(songs[outer_song_counter], songs[inner_song_counter]):
duplicate_added = self.add_duplicates_to_song_list(
songs[outer_song_counter], songs[inner_song_counter])
if duplicate_added:
self.found_duplicates_edit.appendPlainText(
songs[outer_song_counter].title + " = " + songs[inner_song_counter].title)
self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
# The call to process_events() will keep the GUI responsive.
self.application.process_events()
if self.break_search:
return
# Create a worker/process pool to check the songs.
process_number = max(1, multiprocessing.cpu_count() - 1)
pool = multiprocessing.Pool(process_number)
result = pool.imap_unordered(songs_probably_equal, song_generator(songs), 30)
# Do not accept any further tasks. Also this closes the processes if all tasks are done.
pool.close()
# While the processes are still working, start to look at the results.
for song_tuple in result:
self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
# The call to process_events() will keep the GUI responsive.
self.application.process_events()
if self.break_search:
pool.terminate()
return
if song_tuple is None:
continue
song1, song2 = song_tuple
duplicate_added = self.add_duplicates_to_song_list(song1, song2)
if duplicate_added:
self.found_duplicates_edit.appendPlainText(song1.title + " = " + song2.title)
self.review_total_count = len(self.duplicate_song_list)
if self.review_total_count == 0:
self.notify_no_duplicates()
else:
if self.duplicate_song_list:
self.button(QtGui.QWizard.NextButton).show()
else:
self.notify_no_duplicates()
finally:
self.application.set_normal_cursor()
elif page_id == self.review_page_id:

View File

@ -52,13 +52,13 @@ MIN_BLOCK_SIZE = 70
MAX_TYPO_SIZE = 3
def songs_probably_equal(song1, song2):
def songs_probably_equal(song_tupel):
"""
Calculate and return whether two songs are probably equal.
:param song1: The first song to compare.
:param song2: The second song to compare.
:param song_tupel: A tuple of two songs to compare.
"""
song1, song2 = song_tupel
if len(song1.search_lyrics) < len(song2.search_lyrics):
small = song1.search_lyrics
large = song2.search_lyrics
@ -75,18 +75,19 @@ def songs_probably_equal(song1, song2):
for element in diff_no_typos:
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
length_of_equal_blocks += _op_length(element)
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
return True
return song1, song2
# Check 2: Similarity based on the relative length of the longest equal block.
# Calculate the length of the largest equal block of the diff set.
length_of_longest_equal_block = 0
for element in diff_no_typos:
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
length_of_longest_equal_block = _op_length(element)
if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 // 3:
return True
if length_of_longest_equal_block > len(small) * 2 // 3:
return song1, song2
# Both checks failed. We assume the songs are not equal.
return False
return None
def _op_length(opcode):

View File

@ -49,7 +49,7 @@ class TestRenderer(TestCase):
def setUp(self):
"""
Set up the components need for all tests.
Set up the components need for all tests
"""
# Mocked out desktop object
self.desktop = MagicMock()
@ -67,7 +67,7 @@ class TestRenderer(TestCase):
def initial_renderer_test(self):
"""
Test the initial renderer state .
Test the initial renderer state
"""
# GIVEN: A new renderer instance.
renderer = Renderer()
@ -77,7 +77,7 @@ class TestRenderer(TestCase):
def default_screen_layout_test(self):
"""
Test the default layout calculations.
Test the default layout calculations
"""
# GIVEN: A new renderer instance.
renderer = Renderer()
@ -87,3 +87,35 @@ class TestRenderer(TestCase):
self.assertEqual(renderer.height, 768, 'The base renderer should be a live controller')
self.assertEqual(renderer.screen_ratio, 0.75, 'The base renderer should be a live controller')
self.assertEqual(renderer.footer_start, 691, 'The base renderer should be a live controller')
def _get_start_tags_test(self):
"""
Test the _get_start_tags() method
"""
# GIVEN: A new renderer instance. Broken raw_text (missing closing tags).
renderer = Renderer()
given_raw_text = '{st}{r}Text text text'
expected_tuple = ('{st}{r}Text text text{/r}{/st}', '{st}{r}',
'<strong><span style="-webkit-text-fill-color:red">')
# WHEN:
result = renderer._get_start_tags(given_raw_text)
# THEN: Check if the correct tuple is returned.
self.assertEqual(result, expected_tuple), 'A tuple should be returned containing the text with correct ' \
'tags, the opening tags, and the opening html tags.'
def _word_split_test(self):
"""
Test the _word_split() method
"""
# GIVEN: A line of text
renderer = Renderer()
given_line = 'beginning asdf \n end asdf'
expected_words = ['beginning', 'asdf', 'end', 'asdf']
# WHEN: Split the line
result_words = renderer._words_split(given_line)
# THEN: The word lists should be the same.
self.assertListEqual(result_words, expected_words)

View File

@ -96,10 +96,10 @@ class TestLib(TestCase):
self.song2.search_lyrics = self.full_lyrics
# WHEN: We compare those songs for equality.
result = songs_probably_equal(self.song1, self.song2)
result = songs_probably_equal((self.song1, self.song2))
# THEN: The result should be True.
assert result is True, 'The result should be True'
# THEN: The result should be a tuple..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
def songs_probably_equal_short_song_test(self):
"""
@ -110,10 +110,10 @@ class TestLib(TestCase):
self.song2.search_lyrics = self.short_lyrics
# WHEN: We compare those songs for equality.
result = songs_probably_equal(self.song1, self.song2)
result = songs_probably_equal((self.song1, self.song2))
# THEN: The result should be True.
assert result is True, 'The result should be True'
# THEN: The result should be a tuple..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
def songs_probably_equal_error_song_test(self):
"""
@ -124,10 +124,11 @@ class TestLib(TestCase):
self.song2.search_lyrics = self.error_lyrics
# WHEN: We compare those songs for equality.
result = songs_probably_equal(self.song1, self.song2)
result = songs_probably_equal((self.song1, self.song2))
# THEN: The result should be a tuple of songs..
assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
# THEN: The result should be True.
assert result is True, 'The result should be True'
def songs_probably_equal_different_song_test(self):
"""
@ -138,10 +139,10 @@ class TestLib(TestCase):
self.song2.search_lyrics = self.different_lyrics
# WHEN: We compare those songs for equality.
result = songs_probably_equal(self.song1, self.song2)
result = songs_probably_equal((self.song1, self.song2))
# THEN: The result should be False.
assert result is False, 'The result should be False'
# THEN: The result should be None.
assert result is None, 'The result should be None'
def remove_typos_beginning_test(self):
"""