openlp/openlp/plugins/songs/lib/importers/opensong.py

298 lines
13 KiB
Python
Raw Normal View History

2010-06-15 20:03:47 +00:00
# -*- coding: utf-8 -*-
2013-01-06 17:25:49 +00:00
# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
2010-06-15 20:03:47 +00:00
###############################################################################
# OpenLP - Open Source Lyrics Projection #
# --------------------------------------------------------------------------- #
2015-12-31 22:46:06 +00:00
# Copyright (c) 2008-2016 OpenLP Developers #
2010-06-15 20:03:47 +00:00
# --------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation; version 2 of the License. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# You should have received a copy of the GNU General Public License along #
# with this program; if not, write to the Free Software Foundation, Inc., 59 #
# Temple Place, Suite 330, Boston, MA 02111-1307 USA #
###############################################################################
2010-07-19 20:43:02 +00:00
import logging
2011-04-17 15:47:02 +00:00
import re
2010-06-15 21:25:50 +00:00
from lxml import objectify
from lxml.etree import Error, LxmlError
2010-06-15 20:03:47 +00:00
2013-10-13 20:36:42 +00:00
from openlp.core.common import translate
2011-04-25 22:15:38 +00:00
from openlp.plugins.songs.lib import VerseType
2014-07-04 09:31:06 +00:00
from openlp.plugins.songs.lib.importers.songimport import SongImport
2011-04-18 16:46:22 +00:00
from openlp.plugins.songs.lib.ui import SongStrings
2010-06-30 20:05:43 +00:00
log = logging.getLogger(__name__)
2013-10-13 20:36:42 +00:00
2010-08-28 23:09:05 +00:00
class OpenSongImport(SongImport):
2010-06-15 20:03:47 +00:00
"""
2010-08-28 23:09:05 +00:00
Import songs exported from OpenSong
2010-06-15 20:03:47 +00:00
The format is described loosely on the `OpenSong File Format Specification
<http://www.opensong.org/d/manual/song_file_format_specification>`_ page on the OpenSong web site. However, it
doesn't describe the <lyrics> section, so here's an attempt:
2010-06-15 20:03:47 +00:00
If the first character of a line is a space, then the rest of that line is lyrics. If it is not a space the
following applies.
Verses can be expressed in one of 2 ways, either in complete verses, or by line grouping, i.e. grouping all line 1's
of a verse together, all line 2's of a verse together, and so on.
2010-06-15 20:03:47 +00:00
2010-08-28 23:09:05 +00:00
An example of complete verses::
2010-06-15 20:03:47 +00:00
2010-08-28 23:09:05 +00:00
<lyrics>
[v1]
List of words
Another Line
2010-06-15 20:03:47 +00:00
2010-08-28 23:09:05 +00:00
[v2]
Some words for the 2nd verse
etc...
</lyrics>
2010-06-15 20:03:47 +00:00
2010-08-28 23:09:05 +00:00
The 'v' in the verse specifiers above can be left out, it is implied.
An example of line grouping::
<lyrics>
[V]
1List of words
2Some words for the 2nd Verse
1Another Line
2etc...
</lyrics>
Either or both forms can be used in one song. The number does not necessarily appear at the start of the line.
Additionally, the [v1] labels can have either upper or lower case Vs.
2010-06-15 20:03:47 +00:00
Other labels can be used also:
2010-08-28 23:09:05 +00:00
C
Chorus
B
Bridge
2010-06-15 20:03:47 +00:00
2010-08-28 23:09:05 +00:00
All verses are imported and tagged appropriately.
2010-06-15 20:03:47 +00:00
Guitar chords can be provided "above" the lyrics (the line is preceded by a period "."), and one or more "_" can
be used to signify long-drawn-out words. Chords and "_" are removed by this importer. For example::
2010-06-15 20:03:47 +00:00
2010-08-28 23:09:05 +00:00
. A7 Bm
1 Some____ Words
2010-06-15 20:03:47 +00:00
Lines that contain only whitespace are ignored.
| indicates a blank line, and || a new slide.
Slide 1 Line 1|Slide 1 Line 2||Slide 2 Line 1|Slide 2 Line 2
Lines beginning with ; are comments
The <presentation> tag is used to populate the OpenLP verse display order field. The Author and Copyright tags are
also imported to the appropriate places.
2010-06-15 20:03:47 +00:00
"""
2010-08-28 23:09:05 +00:00
def __init__(self, manager, **kwargs):
2010-06-15 20:03:47 +00:00
"""
2010-08-28 23:09:05 +00:00
Initialise the class.
2010-06-15 20:03:47 +00:00
"""
super(OpenSongImport, self).__init__(manager, **kwargs)
2014-03-06 20:40:08 +00:00
def do_import(self):
"""
Receive a single file or a list of files to import.
"""
if not isinstance(self.import_source, list):
return
2013-03-07 08:05:43 +00:00
self.import_wizard.progress_bar.setMaximum(len(self.import_source))
for filename in self.import_source:
2013-02-07 11:33:47 +00:00
if self.stop_import_flag:
return
song_file = open(filename, 'rb')
2014-03-06 22:05:15 +00:00
self.do_import_file(song_file)
song_file.close()
2010-07-07 20:16:14 +00:00
2014-03-06 22:05:15 +00:00
def do_import_file(self, file):
2010-06-15 20:03:47 +00:00
"""
2011-04-17 15:47:02 +00:00
Process the OpenSong file - pass in a file-like object, not a file path.
2010-08-28 23:09:05 +00:00
"""
2014-03-05 18:58:22 +00:00
self.set_defaults()
try:
tree = objectify.parse(file)
2010-09-14 14:21:44 +00:00
except (Error, LxmlError):
2014-03-05 18:58:22 +00:00
self.log_error(file.name, SongStrings.XMLSyntaxError)
2013-08-31 18:17:38 +00:00
log.exception('Error parsing XML')
2011-04-19 11:34:04 +00:00
return
2010-06-28 19:55:04 +00:00
root = tree.getroot()
2013-08-31 18:17:38 +00:00
if root.tag != 'song':
2014-03-05 18:58:22 +00:00
self.log_error(file.name, str(
2014-03-06 22:05:15 +00:00
translate('SongsPlugin.OpenSongImport', 'Invalid OpenSong song file. Missing song tag.')))
return
2010-06-28 19:55:04 +00:00
fields = dir(root)
2010-08-28 23:09:05 +00:00
decode = {
2014-03-05 18:58:22 +00:00
'copyright': self.add_copyright,
2013-08-31 18:17:38 +00:00
'ccli': 'ccli_number',
'author': self.parse_author,
'title': 'title',
'aka': 'alternate_title',
'hymn_number': self.parse_song_book_name_and_number,
'user1': self.add_comment,
'user2': self.add_comment,
'user3': self.add_comment
2010-08-28 23:09:05 +00:00
}
2013-08-31 18:17:38 +00:00
for attr, fn_or_string in list(decode.items()):
if attr in fields:
2013-08-31 18:17:38 +00:00
ustring = str(root.__getattr__(attr))
if isinstance(fn_or_string, str):
if attr in ['ccli']:
ustring = ''.join(re.findall('\d+', ustring))
if ustring:
setattr(self, fn_or_string, int(ustring))
else:
setattr(self, fn_or_string, None)
else:
setattr(self, fn_or_string, ustring)
else:
fn_or_string(ustring)
# Themes look like "God: Awe/Wonder", but we just want
# "Awe" and "Wonder". We use a set to ensure each topic
# is only added once, in case it is already there, which
# is actually quite likely if the alttheme is set
topics = set(self.topics)
if 'theme' in fields:
theme = str(root.theme)
2015-12-17 21:43:49 +00:00
subthemes = theme[theme.find(':') + 1:].split('/')
for topic in subthemes:
topics.add(topic.strip())
if 'alttheme' in fields:
theme = str(root.alttheme)
2015-12-17 21:43:49 +00:00
subthemes = theme[theme.find(':') + 1:].split('/')
for topic in subthemes:
topics.add(topic.strip())
self.topics = list(topics)
self.topics.sort()
2010-06-15 21:25:50 +00:00
# data storage while importing
2010-06-28 19:55:04 +00:00
verses = {}
# keep track of verses appearance order
2010-06-28 19:55:04 +00:00
our_verse_order = []
2011-02-18 07:53:40 +00:00
# default verse
2013-02-24 18:13:50 +00:00
verse_tag = VerseType.tags[VerseType.Verse]
2013-08-31 18:17:38 +00:00
verse_num = '1'
# for the case where song has several sections with same marker
inst = 1
2013-08-31 18:17:38 +00:00
if 'lyrics' in fields:
lyrics = str(root.lyrics)
2011-04-25 22:15:38 +00:00
else:
2013-08-31 18:17:38 +00:00
lyrics = ''
for this_line in lyrics.split('\n'):
if not this_line.strip():
2010-06-15 21:25:50 +00:00
continue
# skip this line if it is a comment
2013-08-31 18:17:38 +00:00
if this_line.startswith(';'):
continue
# skip guitar chords and page and column breaks
2013-08-31 18:17:38 +00:00
if this_line.startswith('.') or this_line.startswith('---') or this_line.startswith('-!!'):
2010-06-15 21:25:50 +00:00
continue
# verse/chorus/etc. marker
2013-08-31 18:17:38 +00:00
if this_line.startswith('['):
2010-09-02 20:21:31 +00:00
# drop the square brackets
2013-08-31 18:17:38 +00:00
right_bracket = this_line.find(']')
2011-02-18 07:53:40 +00:00
content = this_line[1:right_bracket].lower()
2013-02-24 18:13:50 +00:00
# have we got any digits? If so, verse number is everything from the digits to the end (openlp does not
# have concept of part verses, so just ignore any non integers on the end (including floats))
2013-08-31 18:17:38 +00:00
match = re.match('(\D*)(\d+)', content)
2010-09-02 20:21:31 +00:00
if match is not None:
2011-02-18 07:53:40 +00:00
verse_tag = match.group(1)
verse_num = match.group(2)
2010-06-15 21:25:50 +00:00
else:
2013-02-24 18:13:50 +00:00
# otherwise we assume number 1 and take the whole prefix as the verse tag
2011-02-18 07:53:40 +00:00
verse_tag = content
2013-08-31 18:17:38 +00:00
verse_num = '1'
2013-01-06 17:25:49 +00:00
verse_index = VerseType.from_loose_input(verse_tag) if verse_tag else 0
2013-02-24 18:13:50 +00:00
verse_tag = VerseType.tags[verse_index]
inst = 1
2013-01-06 17:25:49 +00:00
if [verse_tag, verse_num, inst] in our_verse_order and verse_num in verses.get(verse_tag, {}):
2011-04-17 15:47:02 +00:00
inst = len(verses[verse_tag][verse_num]) + 1
2010-06-15 21:25:50 +00:00
continue
# number at start of line.. it's verse number
2011-02-18 07:53:40 +00:00
if this_line[0].isdigit():
verse_num = this_line[0]
this_line = this_line[1:].strip()
verses.setdefault(verse_tag, {})
verses[verse_tag].setdefault(verse_num, {})
if inst not in verses[verse_tag][verse_num]:
2011-02-18 07:53:40 +00:00
verses[verse_tag][verse_num][inst] = []
2011-04-25 22:15:38 +00:00
our_verse_order.append([verse_tag, verse_num, inst])
2011-02-16 09:43:07 +00:00
# Tidy text and remove the ____s from extended words
2014-03-05 18:58:22 +00:00
this_line = self.tidy_text(this_line)
2013-08-31 18:17:38 +00:00
this_line = this_line.replace('_', '')
this_line = this_line.replace('||', '\n[---]\n')
this_line = this_line.strip()
# If the line consists solely of a '|', then just use the implicit newline
# Otherwise, add a newline for each '|'
if this_line == '|':
this_line = ''
else:
this_line = this_line.replace('|', '\n')
2011-02-18 07:53:40 +00:00
verses[verse_tag][verse_num][inst].append(this_line)
2010-06-15 21:25:50 +00:00
# done parsing
# add verses in original order
verse_joints = {}
2011-02-18 07:53:40 +00:00
for (verse_tag, verse_num, inst) in our_verse_order:
2013-08-31 18:17:38 +00:00
lines = '\n'.join(verses[verse_tag][verse_num][inst])
length = 0
2014-03-06 22:05:15 +00:00
while length < len(verse_num) and verse_num[length].isnumeric():
length += 1
2013-08-31 18:17:38 +00:00
verse_def = '%s%s' % (verse_tag, verse_num[:length])
verse_joints[verse_def] = '%s\n[---]\n%s' % (verse_joints[verse_def], lines) \
if verse_def in verse_joints else lines
# Parsing the dictionary produces the elements in a non-intuitive order. While it "works", it's not a
# natural layout should the user come back to edit the song. Instead we sort by the verse type, so that we
# get all the verses in order (v1, v2, ...), then the chorus(es), bridge(s), pre-chorus(es) etc. We use a
# tuple for the key, since tuples naturally sort in this manner.
verse_defs = sorted(verse_joints.keys(),
key=lambda verse_def: (VerseType.from_tag(verse_def[0]), int(verse_def[1:])))
for verse_def in verse_defs:
lines = verse_joints[verse_def]
2014-03-05 18:58:22 +00:00
self.add_verse(lines, verse_def)
2011-04-25 22:15:38 +00:00
if not self.verses:
2014-03-05 18:58:22 +00:00
self.add_verse('')
# figure out the presentation order, if present
2013-08-31 18:17:38 +00:00
if 'presentation' in fields and root.presentation:
order = str(root.presentation)
2013-02-24 18:13:50 +00:00
# We make all the tags in the lyrics lower case, so match that here and then split into a list on the
# whitespace.
2011-02-16 19:28:55 +00:00
order = order.lower().split()
2011-02-18 07:53:40 +00:00
for verse_def in order:
2013-08-31 18:17:38 +00:00
match = re.match('(\D*)(\d+.*)', verse_def)
if match is not None:
2011-02-18 07:53:40 +00:00
verse_tag = match.group(1)
verse_num = match.group(2)
if not verse_tag:
2013-02-24 18:13:50 +00:00
verse_tag = VerseType.tags[VerseType.Verse]
else:
# Assume it's no.1 if there are no digits
2011-02-18 07:53:40 +00:00
verse_tag = verse_def
2013-08-31 18:17:38 +00:00
verse_num = '1'
verse_index = VerseType.from_loose_input(verse_tag)
verse_tag = VerseType.tags[verse_index]
2013-08-31 18:17:38 +00:00
verse_def = '%s%s' % (verse_tag, verse_num)
if verse_num in verses.get(verse_tag, {}):
2014-03-05 18:58:22 +00:00
self.verse_order_list.append(verse_def)
else:
2014-03-06 22:05:15 +00:00
log.info('Got order %s but not in verse tags, dropping this item from presentation order',
verse_def)
2011-04-18 16:46:22 +00:00
if not self.finish():
2014-03-05 18:58:22 +00:00
self.log_error(file.name)