Re-re-fix duplicate song check on windows, and now it actually works in builds.

This commit is contained in:
Tomas Groth 2014-11-06 10:34:07 +01:00
parent 992ac3bbb8
commit b6e0036383
3 changed files with 23 additions and 17 deletions

View File

@ -28,7 +28,9 @@
###############################################################################
import sys
import multiprocessing
from openlp.core.common import is_win, is_macosx
from openlp.core import main
@ -36,9 +38,14 @@ if __name__ == '__main__':
"""
Instantiate and run the application.
"""
# Add support for using multiprocessing from frozen Windows executable (built using PyInstaller),
# see https://docs.python.org/3/library/multiprocessing.html#multiprocessing.freeze_support
if is_win():
multiprocessing.freeze_support()
# Mac OS X passes arguments like '-psn_XXXX' to the application. This argument is actually a process serial number.
# However, this causes a conflict with other OpenLP arguments. Since we do not use this argument we can delete it
# to avoid any potential conflicts.
if sys.platform.startswith('darwin'):
#if sys.platform.startswith('darwin'):
if is_macosx():
sys.argv = [x for x in sys.argv if not x.startswith('-psn')]
main()

View File

@ -33,7 +33,6 @@ The duplicate song removal logic for OpenLP.
import logging
import multiprocessing
import os
import functools
from PyQt4 import QtCore, QtGui
@ -47,16 +46,17 @@ from openlp.plugins.songs.lib.songcompare import songs_probably_equal
log = logging.getLogger(__name__)
def tuple_generator(number_of_songs):
def song_generator(songs):
"""
This is a generator function to return tuples of two songs position. When completed then all songs position have
once been returned combined with any other songs position.
This is a generator function to return tuples of tuple with two songs and their position in the song array.
When completed then all songs have once been returned combined with any other songs.
:param number_of_songs: Number of songs in the DB.
:param songs: All songs in the database.
"""
for outer_song_counter in range(number_of_songs - 1):
for inner_song_counter in range(outer_song_counter + 1, number_of_songs):
yield (outer_song_counter, inner_song_counter)
for outer_song_counter in range(len(songs) - 1):
for inner_song_counter in range(outer_song_counter + 1, len(songs)):
yield ((outer_song_counter, songs[outer_song_counter].search_lyrics),
(inner_song_counter, songs[inner_song_counter].search_lyrics))
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
@ -184,9 +184,7 @@ class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
# Create a worker/process pool to check the songs.
process_number = max(1, multiprocessing.cpu_count() - 1)
pool = multiprocessing.Pool(process_number)
# Create array with all lyrics
song_lyrics = [song.search_lyrics for song in songs]
result = pool.imap_unordered(functools.partial(songs_probably_equal, song_lyrics), tuple_generator(len(songs)), 30)
result = pool.imap_unordered(songs_probably_equal, song_generator(songs), 30)
# Do not accept any further tasks. Also this closes the processes if all tasks are done.
pool.close()
# While the processes are still working, start to look at the results.

View File

@ -52,14 +52,15 @@ MIN_BLOCK_SIZE = 70
MAX_TYPO_SIZE = 3
def songs_probably_equal(songs, pos_tupel):
def songs_probably_equal(song_tupel):
"""
Calculate and return whether two songs are probably equal.
:param song_tupel: A tuple of two songs to compare.
"""
lyrics1 = songs[pos_tupel[0]]
lyrics2 = songs[pos_tupel[1]]
song1, song2 = song_tupel
pos1, lyrics1 = song1
pos2, lyrics2 = song2
if len(lyrics1) < len(lyrics2):
small = lyrics1
large = lyrics2
@ -78,7 +79,7 @@ def songs_probably_equal(songs, pos_tupel):
length_of_equal_blocks += _op_length(element)
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
return pos_tupel[0], pos_tupel[1]
return pos1, pos2
# Check 2: Similarity based on the relative length of the longest equal block.
# Calculate the length of the largest equal block of the diff set.
length_of_longest_equal_block = 0
@ -86,7 +87,7 @@ def songs_probably_equal(songs, pos_tupel):
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
length_of_longest_equal_block = _op_length(element)
if length_of_longest_equal_block > len(small) * 2 // 3:
return pos_tupel[0], pos_tupel[1]
return pos1, pos2
# Both checks failed. We assume the songs are not equal.
return None