openlp/openlp/plugins/songs/lib/songimport.py

369 lines
14 KiB
Python
Raw Normal View History

2010-04-01 21:36:03 +00:00
# -*- coding: utf-8 -*-
# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
2010-12-26 11:04:47 +00:00
# Copyright (c) 2008-2011 Raoul Snyman #
# Portions copyright (c) 2008-2011 Tim Bentley, Jonathan Corwin, Michael #
2010-07-24 22:10:47 +00:00
# Gorven, Scott Guerrieri, Meinert Jordan, Andreas Preikschat, Christian #
# Richter, Philip Ridout, Maikel Stuivenberg, Martin Thompson, Jon Tibble, #
# Carsten Tinggaard, Frode Woldsund #
2010-04-01 21:36:03 +00:00
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
2010-08-27 15:25:29 +00:00
import logging
2010-06-19 21:54:53 +00:00
import re
2010-08-27 15:25:29 +00:00
from PyQt4 import QtCore
2010-04-19 18:43:20 +00:00
2010-08-27 15:25:29 +00:00
from openlp.core.lib import Receiver, translate
from openlp.plugins.songs.lib import VerseType
from openlp.plugins.songs.lib.db import Song, Author, Topic, Book, MediaFile
2011-01-09 16:52:31 +00:00
from openlp.plugins.songs.lib.xml import SongXML
2010-06-06 07:28:07 +00:00
2010-08-27 15:25:29 +00:00
log = logging.getLogger(__name__)
class SongImport(QtCore.QObject):
2010-04-01 21:36:03 +00:00
"""
Helper class for import a song from a third party source into OpenLP
This class just takes the raw strings, and will work out for itself
2010-06-06 07:28:07 +00:00
whether the authors etc already exist and add them or refer to them
2010-04-01 21:36:03 +00:00
as necessary
"""
def __init__(self, manager):
2010-04-01 21:36:03 +00:00
"""
Initialise and create defaults for properties
2010-06-06 07:28:07 +00:00
``manager``
An instance of a SongManager, through which all database access is
performed.
2010-04-12 07:22:56 +00:00
"""
self.manager = manager
2010-08-27 15:25:29 +00:00
self.stop_import_flag = False
2010-09-01 19:27:45 +00:00
self.set_defaults()
2010-08-28 02:51:29 +00:00
QtCore.QObject.connect(Receiver.get_receiver(),
2011-01-13 17:55:29 +00:00
QtCore.SIGNAL(u'openlp_stop_wizard'), self.stop_import)
2010-09-01 19:27:45 +00:00
2010-09-06 19:56:20 +00:00
def set_defaults(self):
"""
Create defaults for properties - call this before each song
if importing many songs at once to ensure a clean beginning
"""
2010-09-06 19:56:20 +00:00
self.authors = []
2010-04-12 07:22:56 +00:00
self.title = u''
2010-04-01 21:36:03 +00:00
self.song_number = u''
2010-04-02 23:24:51 +00:00
self.alternate_title = u''
2010-04-12 07:22:56 +00:00
self.copyright = u''
2010-07-20 08:33:22 +00:00
self.comments = u''
self.theme_name = u''
self.ccli_number = u''
2010-06-06 07:28:07 +00:00
self.authors = []
self.topics = []
self.media_files = []
2010-06-06 07:28:07 +00:00
self.song_book_name = u''
self.song_book_pub = u''
self.verse_order_list = []
self.verses = []
2010-09-09 19:34:45 +00:00
self.versecounts = {}
self.copyright_string = unicode(translate(
2010-06-23 07:22:05 +00:00
'SongsPlugin.SongImport', 'copyright'))
self.copyright_symbol = unicode(translate(
'SongsPlugin.SongImport', '\xa9'))
2010-08-27 15:25:29 +00:00
def stop_import(self):
"""
Sets the flag for importers to stop their import
"""
log.debug(u'Stopping songs import')
self.stop_import_flag = True
2010-04-12 07:22:56 +00:00
def register(self, import_wizard):
self.import_wizard = import_wizard
2010-04-12 07:22:56 +00:00
@staticmethod
def process_songs_text(manager, text):
songs = []
2010-04-12 21:49:56 +00:00
songtexts = SongImport.tidy_text(text).split(u'\f')
song = SongImport(manager)
2010-04-12 07:22:56 +00:00
for songtext in songtexts:
2010-04-12 21:49:56 +00:00
if songtext.strip():
2010-04-12 07:22:56 +00:00
song.process_song_text(songtext.strip())
2010-04-12 21:49:56 +00:00
if song.check_complete():
songs.append(song)
song = SongImport(manager)
if song.check_complete():
songs.append(song)
2010-04-12 07:22:56 +00:00
return songs
@staticmethod
def tidy_text(text):
"""
Get rid of some dodgy unicode and formatting characters we're not
interested in. Some can be converted to ascii.
"""
text = text.replace(u'\u2018', u'\'')
text = text.replace(u'\u2019', u'\'')
text = text.replace(u'\u201c', u'"')
text = text.replace(u'\u201d', u'"')
text = text.replace(u'\u2026', u'...')
text = text.replace(u'\u2013', u'-')
text = text.replace(u'\u2014', u'-')
# Remove surplus blank lines, spaces, trailing/leading spaces
2010-06-19 21:54:53 +00:00
text = re.sub(r'[ \t\v]+', u' ', text)
text = re.sub(r' ?(\r\n?|\n) ?', u'\n', text)
text = re.sub(r' ?(\n{5}|\f)+ ?', u'\f', text)
2010-04-12 07:22:56 +00:00
return text
def process_song_text(self, text):
versetexts = text.split(u'\n\n')
for versetext in versetexts:
if versetext.strip() != u'':
self.process_verse_text(versetext.strip())
def process_verse_text(self, text):
lines = text.split(u'\n')
2010-09-05 15:16:48 +00:00
if text.lower().find(self.copyright_string) >= 0 \
or text.lower().find(self.copyright_symbol) >= 0:
2010-04-12 07:22:56 +00:00
copyright_found = False
for line in lines:
2010-05-25 16:16:43 +00:00
if (copyright_found or
2010-09-05 15:16:48 +00:00
line.lower().find(self.copyright_string) >= 0 or
line.lower().find(self.copyright_symbol) >= 0):
2010-04-12 07:22:56 +00:00
copyright_found = True
self.add_copyright(line)
else:
2010-06-06 07:28:07 +00:00
self.parse_author(line)
return
2010-04-12 07:22:56 +00:00
if len(lines) == 1:
self.parse_author(lines[0])
return
2010-07-16 21:06:10 +00:00
if not self.title:
self.title = lines[0]
2010-04-12 07:22:56 +00:00
self.add_verse(text)
2010-06-06 07:28:07 +00:00
2010-04-01 21:36:03 +00:00
def add_copyright(self, copyright):
2010-06-06 07:28:07 +00:00
"""
2010-04-01 21:36:03 +00:00
Build the copyright field
2010-04-12 07:22:56 +00:00
"""
if self.copyright.find(copyright) >= 0:
return
if self.copyright != u'':
self.copyright += ' '
self.copyright += copyright
def parse_author(self, text):
"""
Add the author. OpenLP stores them individually so split by 'and', '&'
and comma. However need to check for 'Mr and Mrs Smith' and turn it to
2010-07-15 20:27:44 +00:00
'Mr Smith' and 'Mrs Smith'.
2010-04-12 07:22:56 +00:00
"""
for author in text.split(u','):
authors = author.split(u'&')
for i in range(len(authors)):
author2 = authors[i].strip()
if author2.find(u' ') == -1 and i < len(authors) - 1:
author2 = author2 + u' ' \
+ authors[i + 1].strip().split(u' ')[-1]
if author2.endswith(u'.'):
author2 = author2[:-1]
if author2:
self.add_author(author2)
def add_author(self, author):
2010-06-06 07:28:07 +00:00
"""
2010-04-01 21:36:03 +00:00
Add an author to the list
2010-04-12 07:22:56 +00:00
"""
if author in self.authors:
return
self.authors.append(author)
2010-06-06 07:28:07 +00:00
def add_media_file(self, filename):
"""
Add a media file to the list
"""
if filename in self.media_files:
return
self.media_files.append(filename)
2010-09-09 19:34:45 +00:00
def add_verse(self, verse, versetag=u'V'):
2010-04-01 21:36:03 +00:00
"""
Add a verse. This is the whole verse, lines split by \n
2010-04-03 19:17:37 +00:00
Verse tag can be V1/C1/B etc, or 'V' and 'C' (will count the verses/
choruses itself) or None, where it will assume verse
2010-04-01 21:36:03 +00:00
It will also attempt to detect duplicates. In this case it will just
add to the verse order
2010-06-06 07:28:07 +00:00
"""
2010-04-12 07:22:56 +00:00
for (oldversetag, oldverse) in self.verses:
if oldverse.strip() == verse.strip():
self.verse_order_list.append(oldversetag)
2010-04-03 19:17:37 +00:00
return
2010-09-09 19:34:45 +00:00
if versetag[0] in self.versecounts:
self.versecounts[versetag[0]] += 1
else:
self.versecounts[versetag[0]] = 1
if len(versetag) == 1:
versetag += unicode(self.versecounts[versetag[0]])
elif int(versetag[1:]) > self.versecounts[versetag[0]]:
self.versecounts[versetag[0]] = int(versetag[1:])
2010-04-12 07:22:56 +00:00
self.verses.append([versetag, verse.rstrip()])
self.verse_order_list.append(versetag)
if versetag.startswith(u'V') and self.contains_verse(u'C1'):
self.verse_order_list.append(u'C1')
2010-04-01 21:36:03 +00:00
def repeat_verse(self):
"""
Repeat the previous verse in the verse order
2010-04-12 07:22:56 +00:00
"""
self.verse_order_list.append(self.verse_order_list[-1])
2010-04-02 23:24:51 +00:00
def contains_verse(self, versetag):
return versetag in self.verse_order_list
2010-04-12 07:22:56 +00:00
2010-04-01 21:36:03 +00:00
def check_complete(self):
"""
Check the mandatory fields are entered (i.e. title and a verse)
2010-06-06 07:28:07 +00:00
Author not checked here, if no author then "Author unknown" is
2010-04-01 21:36:03 +00:00
automatically added
2010-04-12 07:22:56 +00:00
"""
if self.title == u'' or len(self.verses) == 0:
return False
else:
return True
2010-06-06 07:28:07 +00:00
def remove_punctuation(self, text):
2010-04-01 21:36:03 +00:00
"""
2010-06-19 21:54:53 +00:00
Extracts alphanumeric words for searchable fields
2010-04-02 14:10:10 +00:00
"""
return re.sub(r'\W+', u' ', text, re.UNICODE)
2010-06-06 07:28:07 +00:00
2010-04-01 21:36:03 +00:00
def finish(self):
"""
All fields have been set to this song. Write it away
2010-04-12 07:22:56 +00:00
"""
2010-08-28 23:09:05 +00:00
if not self.authors:
2010-11-26 14:23:48 +00:00
self.authors.append(unicode(translate('SongsPlugin.SongImport',
'Author unknown')))
2010-04-12 07:22:56 +00:00
self.commit_song()
2010-06-06 07:28:07 +00:00
2010-04-02 14:10:10 +00:00
def commit_song(self):
2010-04-01 21:36:03 +00:00
"""
Write the song and its fields to disk
2010-04-12 07:22:56 +00:00
"""
2010-09-06 19:56:20 +00:00
log.info(u'commiting song %s to database', self.title)
2010-04-12 07:22:56 +00:00
song = Song()
song.title = self.title
song.alternate_title = self.alternate_title
2010-09-24 20:54:52 +00:00
song.search_title = self.remove_punctuation(self.title).lower() \
+ '@' + self.remove_punctuation(self.alternate_title).lower()
2010-04-12 07:22:56 +00:00
song.song_number = self.song_number
song.search_lyrics = u''
verses_changed_to_other = {}
2011-01-09 16:52:31 +00:00
sxml = SongXML()
other_count = 1
2010-04-12 07:22:56 +00:00
for (versetag, versetext) in self.verses:
if versetag[0] == u'C':
2010-06-06 07:28:07 +00:00
versetype = VerseType.to_string(VerseType.Chorus)
2010-04-12 07:22:56 +00:00
elif versetag[0] == u'V':
2010-06-06 07:28:07 +00:00
versetype = VerseType.to_string(VerseType.Verse)
2010-04-12 07:22:56 +00:00
elif versetag[0] == u'B':
2010-06-06 07:28:07 +00:00
versetype = VerseType.to_string(VerseType.Bridge)
2010-04-12 07:22:56 +00:00
elif versetag[0] == u'I':
2010-06-06 07:28:07 +00:00
versetype = VerseType.to_string(VerseType.Intro)
2010-04-12 07:22:56 +00:00
elif versetag[0] == u'P':
2010-06-06 07:28:07 +00:00
versetype = VerseType.to_string(VerseType.PreChorus)
2010-04-12 07:22:56 +00:00
elif versetag[0] == u'E':
2010-06-06 07:28:07 +00:00
versetype = VerseType.to_string(VerseType.Ending)
2010-04-12 07:22:56 +00:00
else:
newversetag = u'O%d' % other_count
verses_changed_to_other[versetag] = newversetag
other_count += 1
2010-06-06 07:28:07 +00:00
versetype = VerseType.to_string(VerseType.Other)
log.info(u'Versetype %s changing to %s' , versetag, newversetag)
versetag = newversetag
2010-04-12 07:22:56 +00:00
sxml.add_verse_to_lyrics(versetype, versetag[1:], versetext)
song.search_lyrics += u' ' + self.remove_punctuation(versetext)
2010-09-24 20:54:52 +00:00
song.search_lyrics = song.search_lyrics.lower()
2010-04-12 07:22:56 +00:00
song.lyrics = unicode(sxml.extract_xml(), u'utf-8')
for i, current_verse_tag in enumerate(self.verse_order_list):
if verses_changed_to_other.has_key(current_verse_tag):
2010-11-03 18:03:28 +00:00
self.verse_order_list[i] = \
verses_changed_to_other[current_verse_tag]
2010-04-12 07:22:56 +00:00
song.verse_order = u' '.join(self.verse_order_list)
song.copyright = self.copyright
2010-07-20 08:33:22 +00:00
song.comments = self.comments
song.theme_name = self.theme_name
song.ccli_number = self.ccli_number
2010-04-12 07:22:56 +00:00
for authortext in self.authors:
2010-06-30 22:05:51 +00:00
author = self.manager.get_object_filtered(Author,
Author.display_name == authortext)
if not author:
2010-07-15 22:45:55 +00:00
author = Author.populate(display_name = authortext,
last_name=authortext.split(u' ')[-1],
first_name=u' '.join(authortext.split(u' ')[:-1]))
2010-04-12 07:22:56 +00:00
song.authors.append(author)
for filename in self.media_files:
media_file = self.manager.get_object_filtered(MediaFile,
MediaFile.file_name == filename)
if not media_file:
song.media_files.append(MediaFile.populate(file_name=filename))
2010-04-12 07:22:56 +00:00
if self.song_book_name:
2010-06-30 22:05:51 +00:00
song_book = self.manager.get_object_filtered(Book,
Book.name == self.song_book_name)
2010-04-12 07:22:56 +00:00
if song_book is None:
2010-07-15 22:45:55 +00:00
song_book = Book.populate(name=self.song_book_name,
publisher=self.song_book_pub)
song.book = song_book
2010-04-12 07:22:56 +00:00
for topictext in self.topics:
2010-07-15 20:27:44 +00:00
if len(topictext) == 0:
continue
2010-07-20 08:33:22 +00:00
topic = self.manager.get_object_filtered(Topic,
Topic.name == topictext)
2010-04-12 07:22:56 +00:00
if topic is None:
2010-07-15 22:45:55 +00:00
topic = Topic.populate(name=topictext)
2010-07-15 20:27:44 +00:00
song.topics.append(topic)
2010-06-28 13:38:29 +00:00
self.manager.save_object(song)
self.set_defaults()
2010-06-06 07:28:07 +00:00
2010-04-12 07:22:56 +00:00
def print_song(self):
2010-06-06 07:28:07 +00:00
"""
For debugging
2010-04-12 07:22:56 +00:00
"""
print u'========================================' \
+ u'========================================'
2010-06-06 07:28:07 +00:00
print u'TITLE: ' + self.title
print u'ALT TITLE: ' + self.alternate_title
2010-04-12 07:22:56 +00:00
for (versetag, versetext) in self.verses:
print u'VERSE ' + versetag + u': ' + versetext
print u'ORDER: ' + u' '.join(self.verse_order_list)
for author in self.authors:
print u'AUTHOR: ' + author
if self.copyright:
print u'COPYRIGHT: ' + self.copyright
if self.song_book_name:
print u'BOOK: ' + self.song_book_name
if self.song_book_pub:
print u'BOOK PUBLISHER: ' + self.song_book_pub
if self.song_number:
print u'NUMBER: ' + self.song_number
2010-06-06 07:28:07 +00:00
for topictext in self.topics:
2010-04-12 07:22:56 +00:00
print u'TOPIC: ' + topictext
2010-07-20 08:33:22 +00:00
if self.comments:
print u'COMMENTS: ' + self.comments
if self.theme_name:
print u'THEME: ' + self.theme_name
if self.ccli_number:
print u'CCLI: ' + self.ccli_number