forked from openlp/openlp
Second attempt to fix duplicate-song-detection on windows
This commit is contained in:
parent
02a159bf65
commit
992ac3bbb8
@ -33,6 +33,7 @@ The duplicate song removal logic for OpenLP.
|
|||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
|
import functools
|
||||||
|
|
||||||
from PyQt4 import QtCore, QtGui
|
from PyQt4 import QtCore, QtGui
|
||||||
|
|
||||||
@ -46,17 +47,16 @@ from openlp.plugins.songs.lib.songcompare import songs_probably_equal
|
|||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def song_generator(songs):
|
def tuple_generator(number_of_songs):
|
||||||
"""
|
"""
|
||||||
This is a generator function to return tuples of tuple with two songs and their position in the song array.
|
This is a generator function to return tuples of two songs position. When completed then all songs position have
|
||||||
When completed then all songs have once been returned combined with any other songs.
|
once been returned combined with any other songs position.
|
||||||
|
|
||||||
:param songs: All songs in the database.
|
:param number_of_songs: Number of songs in the DB.
|
||||||
"""
|
"""
|
||||||
for outer_song_counter in range(len(songs) - 1):
|
for outer_song_counter in range(number_of_songs - 1):
|
||||||
for inner_song_counter in range(outer_song_counter + 1, len(songs)):
|
for inner_song_counter in range(outer_song_counter + 1, number_of_songs):
|
||||||
yield ((outer_song_counter, songs[outer_song_counter].search_lyrics),
|
yield (outer_song_counter, inner_song_counter)
|
||||||
(inner_song_counter, songs[inner_song_counter].search_lyrics))
|
|
||||||
|
|
||||||
|
|
||||||
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
|
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
|
||||||
@ -184,7 +184,9 @@ class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
|
|||||||
# Create a worker/process pool to check the songs.
|
# Create a worker/process pool to check the songs.
|
||||||
process_number = max(1, multiprocessing.cpu_count() - 1)
|
process_number = max(1, multiprocessing.cpu_count() - 1)
|
||||||
pool = multiprocessing.Pool(process_number)
|
pool = multiprocessing.Pool(process_number)
|
||||||
result = pool.imap_unordered(songs_probably_equal, song_generator(songs), 30)
|
# Create array with all lyrics
|
||||||
|
song_lyrics = [song.search_lyrics for song in songs]
|
||||||
|
result = pool.imap_unordered(functools.partial(songs_probably_equal, song_lyrics), tuple_generator(len(songs)), 30)
|
||||||
# Do not accept any further tasks. Also this closes the processes if all tasks are done.
|
# Do not accept any further tasks. Also this closes the processes if all tasks are done.
|
||||||
pool.close()
|
pool.close()
|
||||||
# While the processes are still working, start to look at the results.
|
# While the processes are still working, start to look at the results.
|
||||||
|
@ -52,15 +52,14 @@ MIN_BLOCK_SIZE = 70
|
|||||||
MAX_TYPO_SIZE = 3
|
MAX_TYPO_SIZE = 3
|
||||||
|
|
||||||
|
|
||||||
def songs_probably_equal(song_tupel):
|
def songs_probably_equal(songs, pos_tupel):
|
||||||
"""
|
"""
|
||||||
Calculate and return whether two songs are probably equal.
|
Calculate and return whether two songs are probably equal.
|
||||||
|
|
||||||
:param song_tupel: A tuple of two songs to compare.
|
:param song_tupel: A tuple of two songs to compare.
|
||||||
"""
|
"""
|
||||||
song1, song2 = song_tupel
|
lyrics1 = songs[pos_tupel[0]]
|
||||||
pos1, lyrics1 = song1
|
lyrics2 = songs[pos_tupel[1]]
|
||||||
pos2, lyrics2 = song2
|
|
||||||
if len(lyrics1) < len(lyrics2):
|
if len(lyrics1) < len(lyrics2):
|
||||||
small = lyrics1
|
small = lyrics1
|
||||||
large = lyrics2
|
large = lyrics2
|
||||||
@ -79,7 +78,7 @@ def songs_probably_equal(song_tupel):
|
|||||||
length_of_equal_blocks += _op_length(element)
|
length_of_equal_blocks += _op_length(element)
|
||||||
|
|
||||||
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
|
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
|
||||||
return pos1, pos2
|
return pos_tupel[0], pos_tupel[1]
|
||||||
# Check 2: Similarity based on the relative length of the longest equal block.
|
# Check 2: Similarity based on the relative length of the longest equal block.
|
||||||
# Calculate the length of the largest equal block of the diff set.
|
# Calculate the length of the largest equal block of the diff set.
|
||||||
length_of_longest_equal_block = 0
|
length_of_longest_equal_block = 0
|
||||||
@ -87,7 +86,7 @@ def songs_probably_equal(song_tupel):
|
|||||||
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
|
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
|
||||||
length_of_longest_equal_block = _op_length(element)
|
length_of_longest_equal_block = _op_length(element)
|
||||||
if length_of_longest_equal_block > len(small) * 2 // 3:
|
if length_of_longest_equal_block > len(small) * 2 // 3:
|
||||||
return pos1, pos2
|
return pos_tupel[0], pos_tupel[1]
|
||||||
# Both checks failed. We assume the songs are not equal.
|
# Both checks failed. We assume the songs are not equal.
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user