openlp/openlp/plugins/songs/lib/songimport.py

352 lines
14 KiB
Python
Raw Normal View History

2010-04-01 21:36:03 +00:00
# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
2010-12-26 11:04:47 +00:00
# Copyright (c) 2008-2011 Raoul Snyman #
# Portions copyright (c) 2008-2011 Tim Bentley, Jonathan Corwin, Michael #
# Gorven, Scott Guerrieri, Meinert Jordan, Armin Köhler, Andreas Preikschat, #
# Christian Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon #
# Tibble, Carsten Tinggaard, Frode Woldsund #
2010-04-01 21:36:03 +00:00
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
2010-08-27 15:25:29 +00:00
import logging
2010-06-19 21:54:53 +00:00
import re
2010-08-27 15:25:29 +00:00
from PyQt4 import QtCore
2010-04-19 18:43:20 +00:00
2010-08-27 15:25:29 +00:00
from openlp.core.lib import Receiver, translate
2011-03-14 18:59:59 +00:00
from openlp.plugins.songs.lib import clean_song, VerseType
from openlp.plugins.songs.lib.db import Song, Author, Topic, Book, MediaFile
2011-02-13 15:17:42 +00:00
from openlp.plugins.songs.lib.ui import SongStrings
2011-01-09 16:52:31 +00:00
from openlp.plugins.songs.lib.xml import SongXML
2010-06-06 07:28:07 +00:00
2010-08-27 15:25:29 +00:00
log = logging.getLogger(__name__)
class SongImport(QtCore.QObject):
2010-04-01 21:36:03 +00:00
"""
Helper class for import a song from a third party source into OpenLP
This class just takes the raw strings, and will work out for itself
2010-06-06 07:28:07 +00:00
whether the authors etc already exist and add them or refer to them
2010-04-01 21:36:03 +00:00
as necessary
"""
2011-02-18 17:34:43 +00:00
def __init__(self, manager, **kwargs):
2010-04-01 21:36:03 +00:00
"""
Initialise and create defaults for properties
2010-06-06 07:28:07 +00:00
``manager``
An instance of a SongManager, through which all database access is
performed.
2010-04-12 07:22:56 +00:00
"""
self.manager = manager
2011-02-20 00:05:50 +00:00
QtCore.QObject.__init__(self)
if kwargs.has_key(u'filename'):
self.import_source = kwargs[u'filename']
elif kwargs.has_key(u'filenames'):
self.import_source = kwargs[u'filenames']
else:
raise KeyError(u'Keyword arguments "filename[s]" not supplied.')
log.debug(self.import_source)
self.import_wizard = None
2011-02-18 17:34:43 +00:00
self.song = None
2010-08-27 15:25:29 +00:00
self.stop_import_flag = False
2010-09-01 19:27:45 +00:00
self.set_defaults()
2010-08-28 02:51:29 +00:00
QtCore.QObject.connect(Receiver.get_receiver(),
2011-01-13 17:55:29 +00:00
QtCore.SIGNAL(u'openlp_stop_wizard'), self.stop_import)
2010-09-01 19:27:45 +00:00
2010-09-06 19:56:20 +00:00
def set_defaults(self):
"""
Create defaults for properties - call this before each song
if importing many songs at once to ensure a clean beginning
"""
2010-04-12 07:22:56 +00:00
self.title = u''
2010-04-01 21:36:03 +00:00
self.song_number = u''
2010-04-02 23:24:51 +00:00
self.alternate_title = u''
2010-04-12 07:22:56 +00:00
self.copyright = u''
2010-07-20 08:33:22 +00:00
self.comments = u''
self.theme_name = u''
self.ccli_number = u''
2010-06-06 07:28:07 +00:00
self.authors = []
self.topics = []
self.media_files = []
2010-06-06 07:28:07 +00:00
self.song_book_name = u''
self.song_book_pub = u''
self.verse_order_list_generated_useful = False
self.verse_order_list_generated = []
2010-06-06 07:28:07 +00:00
self.verse_order_list = []
self.verses = []
self.verse_counts = {}
self.copyright_string = unicode(translate(
2010-06-23 07:22:05 +00:00
'SongsPlugin.SongImport', 'copyright'))
2010-08-27 15:25:29 +00:00
def stop_import(self):
"""
Sets the flag for importers to stop their import
"""
log.debug(u'Stopping songs import')
self.stop_import_flag = True
2010-04-12 07:22:56 +00:00
def register(self, import_wizard):
self.import_wizard = import_wizard
def tidy_text(self, text):
2010-04-12 07:22:56 +00:00
"""
Get rid of some dodgy unicode and formatting characters we're not
interested in. Some can be converted to ascii.
"""
text = text.replace(u'\u2018', u'\'')
text = text.replace(u'\u2019', u'\'')
text = text.replace(u'\u201c', u'"')
text = text.replace(u'\u201d', u'"')
text = text.replace(u'\u2026', u'...')
text = text.replace(u'\u2013', u'-')
text = text.replace(u'\u2014', u'-')
# Remove surplus blank lines, spaces, trailing/leading spaces
2010-06-19 21:54:53 +00:00
text = re.sub(r'[ \t\v]+', u' ', text)
text = re.sub(r' ?(\r\n?|\n) ?', u'\n', text)
text = re.sub(r' ?(\n{5}|\f)+ ?', u'\f', text)
2010-04-12 07:22:56 +00:00
return text
def process_song_text(self, text):
verse_texts = text.split(u'\n\n')
for verse_text in verse_texts:
if verse_text.strip() != u'':
self.process_verse_text(verse_text.strip())
2010-04-12 07:22:56 +00:00
def process_verse_text(self, text):
lines = text.split(u'\n')
2010-09-05 15:16:48 +00:00
if text.lower().find(self.copyright_string) >= 0 \
2011-03-10 22:25:58 +00:00
or text.find(unicode(SongStrings.CopyrightSymbol)) >= 0:
2010-04-12 07:22:56 +00:00
copyright_found = False
for line in lines:
2010-05-25 16:16:43 +00:00
if (copyright_found or
2010-09-05 15:16:48 +00:00
line.lower().find(self.copyright_string) >= 0 or
2011-03-10 22:25:58 +00:00
line.find(unicode(SongStrings.CopyrightSymbol)) >= 0):
2010-04-12 07:22:56 +00:00
copyright_found = True
self.add_copyright(line)
else:
2010-06-06 07:28:07 +00:00
self.parse_author(line)
return
2010-04-12 07:22:56 +00:00
if len(lines) == 1:
self.parse_author(lines[0])
return
2010-07-16 21:06:10 +00:00
if not self.title:
self.title = lines[0]
2010-04-12 07:22:56 +00:00
self.add_verse(text)
2010-06-06 07:28:07 +00:00
2010-04-01 21:36:03 +00:00
def add_copyright(self, copyright):
2010-06-06 07:28:07 +00:00
"""
2010-04-01 21:36:03 +00:00
Build the copyright field
2010-04-12 07:22:56 +00:00
"""
if self.copyright.find(copyright) >= 0:
return
if self.copyright != u'':
self.copyright += ' '
self.copyright += copyright
def parse_author(self, text):
"""
Add the author. OpenLP stores them individually so split by 'and', '&'
and comma. However need to check for 'Mr and Mrs Smith' and turn it to
2010-07-15 20:27:44 +00:00
'Mr Smith' and 'Mrs Smith'.
2010-04-12 07:22:56 +00:00
"""
for author in text.split(u','):
authors = author.split(u'&')
for i in range(len(authors)):
author2 = authors[i].strip()
if author2.find(u' ') == -1 and i < len(authors) - 1:
author2 = author2 + u' ' \
+ authors[i + 1].strip().split(u' ')[-1]
if author2.endswith(u'.'):
author2 = author2[:-1]
if author2:
self.add_author(author2)
def add_author(self, author):
2010-06-06 07:28:07 +00:00
"""
2010-04-01 21:36:03 +00:00
Add an author to the list
2010-04-12 07:22:56 +00:00
"""
if author in self.authors:
return
self.authors.append(author)
2010-06-06 07:28:07 +00:00
def add_media_file(self, filename):
"""
Add a media file to the list
"""
if filename in self.media_files:
return
self.media_files.append(filename)
2011-02-18 07:53:40 +00:00
def add_verse(self, verse_text, verse_def=u'v', lang=None):
2010-04-01 21:36:03 +00:00
"""
Add a verse. This is the whole verse, lines split by \\n. It will also
attempt to detect duplicates. In this case it will just add to the verse
order.
2011-02-18 07:53:40 +00:00
``verse_text``
The text of the verse.
2011-02-18 07:53:40 +00:00
``verse_def``
The verse tag can be v1/c1/b etc, or 'v' and 'c' (will count the
2011-01-19 20:08:07 +00:00
verses/choruses itself) or None, where it will assume verse.
``lang``
The language code (ISO-639) of the verse, for example *en* or *de*.
2010-06-06 07:28:07 +00:00
"""
2011-02-18 07:53:40 +00:00
for (old_verse_def, old_verse, old_lang) in self.verses:
if old_verse.strip() == verse_text.strip():
self.verse_order_list_generated.append(old_verse_def)
self.verse_order_list_generated_useful = True
2010-04-03 19:17:37 +00:00
return
2011-02-18 07:53:40 +00:00
if verse_def[0] in self.verse_counts:
self.verse_counts[verse_def[0]] += 1
2010-09-09 19:34:45 +00:00
else:
2011-02-18 07:53:40 +00:00
self.verse_counts[verse_def[0]] = 1
if len(verse_def) == 1:
verse_def += unicode(self.verse_counts[verse_def[0]])
elif int(verse_def[1:]) > self.verse_counts[verse_def[0]]:
self.verse_counts[verse_def[0]] = int(verse_def[1:])
self.verses.append([verse_def, verse_text.rstrip(), lang])
2011-02-18 07:53:40 +00:00
self.verse_order_list_generated.append(verse_def)
2010-04-01 21:36:03 +00:00
def repeat_verse(self):
"""
Repeat the previous verse in the verse order
2010-04-12 07:22:56 +00:00
"""
self.verse_order_list_generated.append(
self.verse_order_list_generated[-1])
self.verse_order_list_generated_useful = True
2010-04-02 23:24:51 +00:00
2010-04-01 21:36:03 +00:00
def check_complete(self):
"""
Check the mandatory fields are entered (i.e. title and a verse)
2010-06-06 07:28:07 +00:00
Author not checked here, if no author then "Author unknown" is
2010-04-01 21:36:03 +00:00
automatically added
2010-04-12 07:22:56 +00:00
"""
if self.title == u'' or len(self.verses) == 0:
return False
else:
return True
2010-06-06 07:28:07 +00:00
2010-04-01 21:36:03 +00:00
def finish(self):
"""
2011-01-18 04:32:24 +00:00
All fields have been set to this song. Write the song to disk.
2010-04-12 07:22:56 +00:00
"""
2011-02-18 17:34:43 +00:00
log.info(u'committing song %s to database', self.title)
2010-04-12 07:22:56 +00:00
song = Song()
song.title = self.title
song.alternate_title = self.alternate_title
2011-03-15 19:33:11 +00:00
# Values will be set when cleaning the song.
2011-03-14 19:04:57 +00:00
song.search_title = u''
2011-03-15 18:24:13 +00:00
song.search_lyrics = u''
song.verse_order = u''
2010-04-12 07:22:56 +00:00
song.song_number = self.song_number
verses_changed_to_other = {}
2011-01-09 16:52:31 +00:00
sxml = SongXML()
other_count = 1
2011-02-18 07:53:40 +00:00
for (verse_def, verse_text, lang) in self.verses:
if verse_def[0].lower() in VerseType.Tags:
verse_tag = verse_def[0].lower()
2010-04-12 07:22:56 +00:00
else:
2011-02-18 07:53:40 +00:00
new_verse_def = u'%s%d' % (VerseType.Tags[VerseType.Other],
other_count)
2011-02-18 07:53:40 +00:00
verses_changed_to_other[verse_def] = new_verse_def
other_count += 1
2011-02-18 07:53:40 +00:00
verse_tag = VerseType.Tags[VerseType.Other]
log.info(u'Versetype %s changing to %s' , verse_def,
new_verse_def)
verse_def = new_verse_def
sxml.add_verse_to_lyrics(verse_tag, verse_def[1:], verse_text, lang)
2010-04-12 07:22:56 +00:00
song.lyrics = unicode(sxml.extract_xml(), u'utf-8')
if not len(self.verse_order_list) and \
self.verse_order_list_generated_useful:
self.verse_order_list = self.verse_order_list_generated
2011-02-18 07:53:40 +00:00
for i, current_verse_def in enumerate(self.verse_order_list):
if verses_changed_to_other.has_key(current_verse_def):
2010-11-03 18:03:28 +00:00
self.verse_order_list[i] = \
2011-02-18 07:53:40 +00:00
verses_changed_to_other[current_verse_def]
2010-04-12 07:22:56 +00:00
song.verse_order = u' '.join(self.verse_order_list)
song.copyright = self.copyright
2010-07-20 08:33:22 +00:00
song.comments = self.comments
song.theme_name = self.theme_name
song.ccli_number = self.ccli_number
2010-04-12 07:22:56 +00:00
for authortext in self.authors:
2010-06-30 22:05:51 +00:00
author = self.manager.get_object_filtered(Author,
Author.display_name == authortext)
if not author:
2011-02-23 17:35:27 +00:00
author = Author.populate(display_name=authortext,
2010-07-15 22:45:55 +00:00
last_name=authortext.split(u' ')[-1],
first_name=u' '.join(authortext.split(u' ')[:-1]))
2010-04-12 07:22:56 +00:00
song.authors.append(author)
for filename in self.media_files:
media_file = self.manager.get_object_filtered(MediaFile,
MediaFile.file_name == filename)
if not media_file:
song.media_files.append(MediaFile.populate(file_name=filename))
2010-04-12 07:22:56 +00:00
if self.song_book_name:
2010-06-30 22:05:51 +00:00
song_book = self.manager.get_object_filtered(Book,
Book.name == self.song_book_name)
2010-04-12 07:22:56 +00:00
if song_book is None:
2010-07-15 22:45:55 +00:00
song_book = Book.populate(name=self.song_book_name,
publisher=self.song_book_pub)
song.book = song_book
2010-04-12 07:22:56 +00:00
for topictext in self.topics:
2010-07-15 20:27:44 +00:00
if len(topictext) == 0:
continue
2010-07-20 08:33:22 +00:00
topic = self.manager.get_object_filtered(Topic,
Topic.name == topictext)
2010-04-12 07:22:56 +00:00
if topic is None:
2010-07-15 22:45:55 +00:00
topic = Topic.populate(name=topictext)
2010-07-15 20:27:44 +00:00
song.topics.append(topic)
2011-03-14 18:59:59 +00:00
clean_song(self.manager, song)
2010-06-28 13:38:29 +00:00
self.manager.save_object(song)
self.set_defaults()
2010-06-06 07:28:07 +00:00
2010-04-12 07:22:56 +00:00
def print_song(self):
2010-06-06 07:28:07 +00:00
"""
For debugging
2010-04-12 07:22:56 +00:00
"""
2011-02-25 17:05:01 +00:00
print u'========================================' \
2010-04-12 07:22:56 +00:00
+ u'========================================'
2010-06-06 07:28:07 +00:00
print u'TITLE: ' + self.title
print u'ALT TITLE: ' + self.alternate_title
2011-02-18 07:53:40 +00:00
for (verse_def, verse_text, lang) in self.verses:
print u'VERSE ' + verse_def + u': ' + verse_text
2010-04-12 07:22:56 +00:00
print u'ORDER: ' + u' '.join(self.verse_order_list)
print u'GENERATED ORDER: ' + u' '.join(self.verse_order_list_generated)
2010-04-12 07:22:56 +00:00
for author in self.authors:
print u'AUTHOR: ' + author
if self.copyright:
print u'COPYRIGHT: ' + self.copyright
if self.song_book_name:
print u'BOOK: ' + self.song_book_name
if self.song_book_pub:
print u'BOOK PUBLISHER: ' + self.song_book_pub
if self.song_number:
print u'NUMBER: ' + self.song_number
2010-06-06 07:28:07 +00:00
for topictext in self.topics:
2010-04-12 07:22:56 +00:00
print u'TOPIC: ' + topictext
2010-07-20 08:33:22 +00:00
if self.comments:
print u'COMMENTS: ' + self.comments
if self.theme_name:
print u'THEME: ' + self.theme_name
if self.ccli_number:
2011-01-13 17:55:29 +00:00
print u'CCLI: ' + self.ccli_number