igitar/src/igitar/base.py

274 lines
9.0 KiB
Python

from hyphen import Hyphenator
from igitar.constants import BRIDGE_MARKER, CHORD_WORD, CHORUS_MARKER, DIRECTIVE, END_OF, KNOWN_DIRECTIVES, START_OF, \
VERSE_MARKER
HYPHEN_CACHE = {}
SYLLABLE_EXCEPTIONS = {
'outer': ['out', 'er']
}
class MismatchedVerseType(Exception):
def __init__(self, line_number, expected_type):
super().__init__('Mismatched verse type on line {}, expected {}'.format(line_number, expected_type))
class Directive(object):
def __init__(self, line=None):
self.directive = None
self.info = None
if line:
self.parse(line)
def parse(self, line):
"""Parse a directive line and return a Directive object"""
match = DIRECTIVE.match(line)
if not match:
return None
for known_directive in KNOWN_DIRECTIVES:
if match.group(1) in known_directive:
self.directive = known_directive[0]
self.info = match.group(2)
@staticmethod
def is_directive(line):
"""Check if a line in a file contains a directive"""
return DIRECTIVE.match(line) is not None
class Syllable(object):
def __init__(self, syllable, chord=None):
self.syllable = syllable
self.chord = chord
class Word(object):
def __init__(self, word=None):
self.syllables = []
if word:
self.parse(word)
def parse(self, word):
"""Parse a word into syllables with chords.
1. Split word by chords
2. Rejoin word, split into syllables
3. Track down syllable before chord
4. Add chord to syllable
"""
word_parts = []
chords = ['']
match = CHORD_WORD.match(word)
while match:
word_parts.append(match.group(1))
chords.append(match.group(2))
word = word.replace(match.group(0), '')
match = CHORD_WORD.match(word)
# If there are any left over portions, just add them as the rest of the word
word_parts.append(word)
whole_word = ''.join(word_parts)
self.syllables = []
if whole_word in SYLLABLE_EXCEPTIONS:
# words with a 2-letter ending syllable currently do not get recognised by PyHyphen
sylls = SYLLABLE_EXCEPTIONS[whole_word]
else:
if 'en_US' not in HYPHEN_CACHE:
HYPHEN_CACHE['en_US'] = Hyphenator('en_US')
if 'en_GB' not in HYPHEN_CACHE:
HYPHEN_CACHE['en_GB'] = Hyphenator('en_GB')
hyphenator = HYPHEN_CACHE['en_US']
# Do a fallback for en_GB
if not hyphenator.pairs(whole_word):
if HYPHEN_CACHE['en_GB'].pairs(whole_word):
hyphenator = HYPHEN_CACHE['en_GB']
sylls = hyphenator.syllables(whole_word)
if not sylls:
sylls = [whole_word]
for syll in sylls:
syllable = Syllable(syll)
can_consume = False
for idx, part in enumerate(word_parts):
if part.startswith(syll):
can_consume = True
syllable.chord = chords[idx]
break
self.syllables.append(syllable)
if can_consume:
word_parts = word_parts[idx + 1:]
chords = chords[idx + 1:]
# Process any left over chords, they're trailing chords
for chord in chords:
self.syllables.append(Syllable('', chord))
class Line(object):
def __init__(self, line=None):
if line:
words = line.split(' ')
# Split trailing chords into their own "words"
last_word = words[-1]
trailing_chords = []
for part in last_word.split('['):
if not part:
continue
if part.split(']')[-1] == '':
trailing_chords.append('[{}]'.format(part.split(']')[0]))
# remove chords from last word
for chord in trailing_chords:
last_word = last_word.replace(chord, '')
# replace last word, and append trailing chords as separate words
words[-1] = last_word
words.extend(trailing_chords)
self.words = [Word(word) for word in words]
else:
self.words = []
class Verse(object):
def __init__(self, type_, title=None, content=None):
self.type_ = type_
self.title = title or type_.title()
self.lines = [Line(line) for line in content.splitlines()] if content else []
def add_line(self, line):
self.lines.append(Line(line))
@classmethod
def parse(cls, line):
"""Parse the line into a verse type"""
match = START_OF.match(line)
verse = cls(match.group(1))
if len(match.groups()) > 1:
verse.title = match.group(3) or match.group(1).title()
return verse
@staticmethod
def is_start_of_verse(line):
return START_OF.match(line) is not None
@staticmethod
def is_end_of_verse(line, type_=None):
match = END_OF.match(line)
if not type_:
return match is not None
return match.group(1) == type_
@staticmethod
def is_verse_marker(line):
return line.strip().startswith('{verse')
@staticmethod
def get_verse_from_marker(line):
match = VERSE_MARKER.match(line)
if not match:
return None
if len(match.groups()) > 1:
return match.group(2)
@staticmethod
def is_chorus_marker(line):
return line.strip().startswith('{chorus')
@staticmethod
def get_chorus_from_marker(line):
match = CHORUS_MARKER.match(line)
if not match:
return None
if len(match.groups()) > 1:
return match.group(2)
@staticmethod
def is_bridge_marker(line):
return line.strip().startswith('{bridge')
@staticmethod
def get_bridge_from_marker(line):
match = BRIDGE_MARKER.match(line)
if not match:
return None
if len(match.groups()) > 1:
return match.group(2)
class Metadata(object):
def __init__(self):
self._directives = {}
def add(self, directive):
self._directives[directive.directive] = directive
def get(self, key):
"""Grab the title from the title directive"""
return self._directives[key].info if self._directives.get(key) else None
class Song(object):
def __init__(self, filename=None):
self.filename = filename
if self.filename:
with open(filename) as song_file:
self.parse(song_file.read())
def parse(self, text):
self.metadata = Metadata()
self.verses = []
self.verse_order = []
is_verse = False
current_verse = None
for line_number, line in enumerate(text.splitlines()):
if Directive.is_directive(line):
self.metadata.add(Directive(line))
elif Verse.is_start_of_verse(line):
is_verse = True
current_verse = Verse.parse(line)
self.verse_order.append(current_verse)
elif Verse.is_end_of_verse(line):
if not Verse.is_end_of_verse(line, current_verse.type_):
raise MismatchedVerseType(line_number, current_verse.type_)
self.verses.append(current_verse)
is_verse = False
current_verse = None
elif is_verse:
current_verse.add_line(line.strip())
elif Verse.is_verse_marker(line):
verse_name = Verse.get_verse_from_marker(line)
for verse in self.verses[::-1]:
if verse.type_ != 'verse':
continue
if verse_name and verse.title == verse_name:
self.verse_order.append(verse)
break
elif not verse_name:
self.verse_order.append(verse)
break
elif Verse.is_chorus_marker(line):
chorus_name = Verse.get_chorus_from_marker(line)
for verse in self.verses[::-1]:
if verse.type_ != 'chorus':
continue
if chorus_name and verse.title == chorus_name:
self.verse_order.append(verse)
break
elif not chorus_name:
self.verse_order.append(verse)
break
elif Verse.is_bridge_marker(line):
bridge_name = Verse.get_bridge_from_marker(line)
for verse in self.verses[::-1]:
if verse.type_ != 'bridge':
continue
elif bridge_name and verse.title == bridge_name:
self.verse_order.append(verse)
break
elif not bridge_name:
self.verse_order.append(verse)
break