import re from hyphen import Hyphenator from chordpro.constants import KNOWN_DIRECTIVES, KNOWN_VERSE_TYPES DIRECTIVE = re.compile(r'\{(.*?): *(.*?)\}') START_OF = re.compile(r'\{start_of_(' + '|'.join(KNOWN_VERSE_TYPES) + r')(: *(.*?))?\}') END_OF = re.compile(r'\{end_of_(' + '|'.join(KNOWN_VERSE_TYPES) + r')\}') CHORD_WORD = re.compile(r'(.*?)\[(.*?)\]') HYPHEN_CACHE = {} SYLLABLE_EXCEPTIONS = { 'outer': ['out', 'er'] } class MismatchedVerseType(Exception): def __init__(self, line_number, expected_type): super().__init__('Mismatched verse type on line {}, expected {}'.format(line_number, expected_type)) class Directive(object): def __init__(self, line=None): self.directive = None self.info = None if line: self.parse(line) def parse(self, line): """Parse a directive line and return a Directive object""" match = DIRECTIVE.match(line) if not match: return None for known_directive in KNOWN_DIRECTIVES: if match.group(1) in known_directive: self.directive = match.group(1) self.info = match.group(2) @staticmethod def is_directive(line): """Check if a line in a file contains a directive""" match = DIRECTIVE.match(line) if match: for known_directive in KNOWN_DIRECTIVES: if match.group(1) in known_directive: return True return False class Syllable(object): def __init__(self, syllable, chord=None): self.syllable = syllable self.chord = chord class Word(object): def __init__(self, word=None): self.syllables = [] if word: self.parse(word) def parse(self, word): """Parse a word into syllables with chords. 1. Split word by chords 2. Rejoin word, split into syllables 3. Track down syllable before chord 4. Add chord to syllable """ word_parts = [] chords = [''] # Due to regex, chords will always be 1 behind, so create an empty item to bump them along match = CHORD_WORD.match(word) while match: word_parts.append(match.group(1)) chords.append(match.group(2)) word = word.replace(match.group(0), '') match = CHORD_WORD.match(word) # If there are any left over portions, just add them as the rest of the word word_parts.append(word) whole_word = ''.join(word_parts) self.syllables = [] if whole_word in SYLLABLE_EXCEPTIONS: # words with a 2-letter ending syllable currently do not get recognised by PyHyphen sylls = SYLLABLE_EXCEPTIONS[whole_word] else: if 'en_US' not in HYPHEN_CACHE: HYPHEN_CACHE['en_US'] = Hyphenator('en_US') if 'en_GB' not in HYPHEN_CACHE: HYPHEN_CACHE['en_GB'] = Hyphenator('en_GB') hyphenator = HYPHEN_CACHE['en_US'] # Do a fallback for en_GB if not hyphenator.pairs(whole_word): if HYPHEN_CACHE['en_GB'].pairs(whole_word): hyphenator = HYPHEN_CACHE['en_GB'] sylls = hyphenator.syllables(whole_word) if not sylls: sylls = [whole_word] for syll in sylls: syllable = Syllable(syll) for i, part in enumerate(word_parts): if part.startswith(syll): syllable.chord = chords[i] break self.syllables.append(syllable) class Line(object): def __init__(self, line=None): if line: self.words = [Word(word) for word in line.split(' ')] else: self.words = [] class Verse(object): def __init__(self, type_, title=None, content=None): self.type_ = type_ self.title = title or type_.title() self.lines = [Line(line) for line in content.splitlines()] if content else [] def add_line(self, line): self.lines.append(Line(line)) @classmethod def parse(cls, line): """Parse the line into a verse type""" match = START_OF.match(line) verse = cls(match.group(1)) if len(match.groups()) > 1: verse.title = match.group(3) return verse @staticmethod def is_start_of_verse(line): return START_OF.match(line) is not None @staticmethod def is_end_of_verse(line, type_=None): match = END_OF.match(line) if not type_: return match is not None return match.group(1) == type_ class Metadata(object): def __init__(self): self._directives = {} def add(self, directive): self._directives[directive.directive] = directive def get(self, key): """Grab the title from the title directive""" return self._directives[key].info if self._directives.get(key) else None class Song(object): def __init__(self, filename=None): self.filename = filename if self.filename: self.parse(self.filename) def parse(self, filename): self.metadata = Metadata() self.verses = [] with open(filename) as song_file: is_verse = False current_verse = None for line_number, line in enumerate(song_file): if Directive.is_directive(line): self.metadata.add(Directive(line)) elif Verse.is_start_of_verse(line): is_verse = True current_verse = Verse.parse(line) elif Verse.is_end_of_verse(line): if not Verse.is_end_of_verse(line, current_verse.type_): raise MismatchedVerseType(line_number, current_verse.type_) self.verses.append(current_verse) is_verse = False current_verse = None elif is_verse: current_verse.add_line(line.strip())