forked from openlp/openlp
Second attempt to fix duplicate-song-detection on windows
This commit is contained in:
parent
02a159bf65
commit
992ac3bbb8
@ -33,6 +33,7 @@ The duplicate song removal logic for OpenLP.
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import functools
|
||||
|
||||
from PyQt4 import QtCore, QtGui
|
||||
|
||||
@ -46,17 +47,16 @@ from openlp.plugins.songs.lib.songcompare import songs_probably_equal
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def song_generator(songs):
|
||||
def tuple_generator(number_of_songs):
|
||||
"""
|
||||
This is a generator function to return tuples of tuple with two songs and their position in the song array.
|
||||
When completed then all songs have once been returned combined with any other songs.
|
||||
This is a generator function to return tuples of two songs position. When completed then all songs position have
|
||||
once been returned combined with any other songs position.
|
||||
|
||||
:param songs: All songs in the database.
|
||||
:param number_of_songs: Number of songs in the DB.
|
||||
"""
|
||||
for outer_song_counter in range(len(songs) - 1):
|
||||
for inner_song_counter in range(outer_song_counter + 1, len(songs)):
|
||||
yield ((outer_song_counter, songs[outer_song_counter].search_lyrics),
|
||||
(inner_song_counter, songs[inner_song_counter].search_lyrics))
|
||||
for outer_song_counter in range(number_of_songs - 1):
|
||||
for inner_song_counter in range(outer_song_counter + 1, number_of_songs):
|
||||
yield (outer_song_counter, inner_song_counter)
|
||||
|
||||
|
||||
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
|
||||
@ -184,7 +184,9 @@ class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
|
||||
# Create a worker/process pool to check the songs.
|
||||
process_number = max(1, multiprocessing.cpu_count() - 1)
|
||||
pool = multiprocessing.Pool(process_number)
|
||||
result = pool.imap_unordered(songs_probably_equal, song_generator(songs), 30)
|
||||
# Create array with all lyrics
|
||||
song_lyrics = [song.search_lyrics for song in songs]
|
||||
result = pool.imap_unordered(functools.partial(songs_probably_equal, song_lyrics), tuple_generator(len(songs)), 30)
|
||||
# Do not accept any further tasks. Also this closes the processes if all tasks are done.
|
||||
pool.close()
|
||||
# While the processes are still working, start to look at the results.
|
||||
|
@ -52,15 +52,14 @@ MIN_BLOCK_SIZE = 70
|
||||
MAX_TYPO_SIZE = 3
|
||||
|
||||
|
||||
def songs_probably_equal(song_tupel):
|
||||
def songs_probably_equal(songs, pos_tupel):
|
||||
"""
|
||||
Calculate and return whether two songs are probably equal.
|
||||
|
||||
:param song_tupel: A tuple of two songs to compare.
|
||||
"""
|
||||
song1, song2 = song_tupel
|
||||
pos1, lyrics1 = song1
|
||||
pos2, lyrics2 = song2
|
||||
lyrics1 = songs[pos_tupel[0]]
|
||||
lyrics2 = songs[pos_tupel[1]]
|
||||
if len(lyrics1) < len(lyrics2):
|
||||
small = lyrics1
|
||||
large = lyrics2
|
||||
@ -79,7 +78,7 @@ def songs_probably_equal(song_tupel):
|
||||
length_of_equal_blocks += _op_length(element)
|
||||
|
||||
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
|
||||
return pos1, pos2
|
||||
return pos_tupel[0], pos_tupel[1]
|
||||
# Check 2: Similarity based on the relative length of the longest equal block.
|
||||
# Calculate the length of the largest equal block of the diff set.
|
||||
length_of_longest_equal_block = 0
|
||||
@ -87,7 +86,7 @@ def songs_probably_equal(song_tupel):
|
||||
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
|
||||
length_of_longest_equal_block = _op_length(element)
|
||||
if length_of_longest_equal_block > len(small) * 2 // 3:
|
||||
return pos1, pos2
|
||||
return pos_tupel[0], pos_tupel[1]
|
||||
# Both checks failed. We assume the songs are not equal.
|
||||
return None
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user