Change song lyrics selector to fetch link via class name

And get copyrights via lyrics page where the list has a class name
This commit is contained in:
Daniel Martin 2021-01-30 06:01:42 +00:00 committed by Raoul Snyman
parent c7112a1f78
commit 81dc52110d
2 changed files with 9 additions and 9 deletions

View File

@ -182,7 +182,7 @@ class SongSelectImport(object):
except (TypeError, URLError) as error: except (TypeError, URLError) as error:
log.exception('Could not get song from SongSelect, {error}'.format(error=error)) log.exception('Could not get song from SongSelect, {error}'.format(error=error))
return None return None
lyrics_link = song_page.find('a', title='View song lyrics')['href'] lyrics_link = song_page.find('section', 'page-section').find('a')['href']
if callback: if callback:
callback() callback()
try: try:
@ -192,15 +192,13 @@ class SongSelectImport(object):
return None return None
if callback: if callback:
callback() callback()
copyright_elements = []
theme_elements = [] theme_elements = []
copyrights_regex = re.compile(r'\bCopyrights\b') # Themes regex only works if the ccli site is in english.
themes_regex = re.compile(r'\bThemes\b') themes_regex = re.compile(r'\bThemes\b')
for ul in song_page.find_all('ul', 'song-meta-list'): for ul in song_page.find_all('ul', 'song-meta-list'):
if ul.find('li', string=copyrights_regex):
copyright_elements.extend(ul.find_all('li')[1:])
if ul.find('li', string=themes_regex): if ul.find('li', string=themes_regex):
theme_elements.extend(ul.find_all('li')[1:]) theme_elements.extend(ul.find_all('li')[1:])
copyright_elements = lyrics_page.find('ul', 'copyright').find_all('li')
author_elements = song_page.find('div', 'content-title').find('ul', 'authors').find_all('li') author_elements = song_page.find('div', 'content-title').find('ul', 'authors').find_all('li')
song['title'] = unescape(song_page.find('div', 'content-title').find('h1').string.strip()) song['title'] = unescape(song_page.find('div', 'content-title').find('h1').string.strip())
song['authors'] = [unescape(li.find('a').string).strip() for li in author_elements] song['authors'] = [unescape(li.find('a').string).strip() for li in author_elements]

View File

@ -314,13 +314,12 @@ class TestSongSelectImport(TestCase, TestMixin):
</ul> </ul>
</div> </div>
<div class="song-content-data"><ul><li><strong>1234_cclinumber_5678</strong></li></ul></div> <div class="song-content-data"><ul><li><strong>1234_cclinumber_5678</strong></li></ul></div>
<a title="View song lyrics" href="pretend link"></a> <section class="page-section">
<a title="View song lyrics" href="pretend link"></a>
</section>
<ul class="song-meta-list"> <ul class="song-meta-list">
<li>Themes</li><li><a>theme1</a></li><li><a>theme2</a></li> <li>Themes</li><li><a>theme1</a></li><li><a>theme2</a></li>
</ul> </ul>
<ul class="song-meta-list">
<li>Copyrights</li><li>Copy thing</li><li>Copy thing 2</li>
</ul>
</body></html> </body></html>
''' '''
fake_lyrics_page = '''<!DOCTYPE html><html><body> fake_lyrics_page = '''<!DOCTYPE html><html><body>
@ -330,6 +329,9 @@ class TestSongSelectImport(TestCase, TestMixin):
<h3>Verse 2</h3> <h3>Verse 2</h3>
<p>verse thing 2</p> <p>verse thing 2</p>
</div> </div>
<ul class="copyright">
<li>Copy thing</li><li>Copy thing 2</li>
</ul>
</body></html> </body></html>
''' '''
mocked_get_page.side_effect = [fake_song_page, fake_lyrics_page] mocked_get_page.side_effect = [fake_song_page, fake_lyrics_page]