diff --git a/openlp/plugins/songs/lib/__init__.py b/openlp/plugins/songs/lib/__init__.py index c12350bfd..332fe3634 100644 --- a/openlp/plugins/songs/lib/__init__.py +++ b/openlp/plugins/songs/lib/__init__.py @@ -32,9 +32,8 @@ from PyQt5 import QtWidgets from openlp.core.common import AppLocation from openlp.core.lib import translate from openlp.core.utils import CONTROL_CHARS -from openlp.plugins.songs.lib.db import MediaFile, Song -from .db import Author -from .ui import SongStrings +from openlp.plugins.songs.lib.db import Author, MediaFile, Song, Topic +from openlp.plugins.songs.lib.ui import SongStrings log = logging.getLogger(__name__) diff --git a/openlp/plugins/songs/lib/db.py b/openlp/plugins/songs/lib/db.py index 53dbdf695..b52cb8304 100644 --- a/openlp/plugins/songs/lib/db.py +++ b/openlp/plugins/songs/lib/db.py @@ -135,7 +135,7 @@ class Song(BaseModel): def add_author(self, author, author_type=None): """ - Add an author to the song if it not yet exists + Add an author to the song if it doesn't exist yet :param author: Author object :param author_type: AuthorType constant or None @@ -162,7 +162,7 @@ class Song(BaseModel): def add_songbook_entry(self, songbook, entry): """ - Add a Songbook Entry to the song if it not yet exists + Add a Songbook Entry to the song if it doesn't exist yet :param songbook_name: Name of the Songbook. :param entry: Entry in the Songbook (usually a number) diff --git a/openlp/plugins/songs/lib/songselect.py b/openlp/plugins/songs/lib/songselect.py index ca417b504..eb9bdede4 100644 --- a/openlp/plugins/songs/lib/songselect.py +++ b/openlp/plugins/songs/lib/songselect.py @@ -24,6 +24,8 @@ The :mod:`~openlp.plugins.songs.lib.songselect` module contains the SongSelect i """ import logging import sys +import random +import re from http.cookiejar import CookieJar from urllib.parse import urlencode from urllib.request import HTTPCookieProcessor, URLError, build_opener @@ -32,14 +34,21 @@ from html import unescape from bs4 import BeautifulSoup, NavigableString -from openlp.plugins.songs.lib import Song, VerseType, clean_song, Author +from openlp.plugins.songs.lib import Song, Author, Topic, VerseType, clean_song from openlp.plugins.songs.lib.openlyricsxml import SongXML -USER_AGENT = 'Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; GT-I9000 ' \ - 'Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 ' \ - 'Mobile Safari/534.30' -BASE_URL = 'https://mobile.songselect.com' -LOGIN_URL = BASE_URL + '/account/login' +USER_AGENTS = [ + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/52.0.2743.116 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0' +] +BASE_URL = 'https://songselect.ccli.com' +LOGIN_PAGE = 'https://profile.ccli.com/account/signin?appContext=SongSelect&returnUrl=' \ + 'https%3a%2f%2fsongselect.ccli.com%2f' +LOGIN_URL = 'https://profile.ccli.com/' LOGOUT_URL = BASE_URL + '/account/logout' SEARCH_URL = BASE_URL + '/search/results' @@ -60,7 +69,7 @@ class SongSelectImport(object): self.db_manager = db_manager self.html_parser = HTMLParser() self.opener = build_opener(HTTPCookieProcessor(CookieJar())) - self.opener.addheaders = [('User-Agent', USER_AGENT)] + self.opener.addheaders = [('User-Agent', random.choice(USER_AGENTS))] self.run_search = True def login(self, username, password, callback=None): @@ -76,27 +85,27 @@ class SongSelectImport(object): if callback: callback() try: - login_page = BeautifulSoup(self.opener.open(LOGIN_URL).read(), 'lxml') - except (TypeError, URLError) as e: - log.exception('Could not login to SongSelect, %s', e) + login_page = BeautifulSoup(self.opener.open(LOGIN_PAGE).read(), 'lxml') + except (TypeError, URLError) as error: + log.exception('Could not login to SongSelect, %s', error) return False if callback: callback() token_input = login_page.find('input', attrs={'name': '__RequestVerificationToken'}) data = urlencode({ '__RequestVerificationToken': token_input['value'], - 'UserName': username, - 'Password': password, + 'emailAddress': username, + 'password': password, 'RememberMe': 'false' }) try: posted_page = BeautifulSoup(self.opener.open(LOGIN_URL, data.encode('utf-8')).read(), 'lxml') - except (TypeError, URLError) as e: - log.exception('Could not login to SongSelect, %s', e) + except (TypeError, URLError) as error: + log.exception('Could not login to SongSelect, %s', error) return False if callback: callback() - return not posted_page.find('input', attrs={'name': '__RequestVerificationToken'}) + return posted_page.find('input', id='SearchText') is not None def logout(self): """ @@ -104,8 +113,8 @@ class SongSelectImport(object): """ try: self.opener.open(LOGOUT_URL) - except (TypeError, URLError) as e: - log.exception('Could not log of SongSelect, %s', e) + except (TypeError, URLError) as error: + log.exception('Could not log of SongSelect, %s', error) def search(self, search_text, max_results, callback=None): """ @@ -117,7 +126,15 @@ class SongSelectImport(object): :return: List of songs """ self.run_search = True - params = {'allowredirect': 'false', 'SearchTerm': search_text} + params = { + 'SongContent': '', + 'PrimaryLanguage': '', + 'Keys': '', + 'Themes': '', + 'List': '', + 'Sort': '', + 'SearchText': search_text + } current_page = 1 songs = [] while self.run_search: @@ -125,7 +142,7 @@ class SongSelectImport(object): params['page'] = current_page try: results_page = BeautifulSoup(self.opener.open(SEARCH_URL + '?' + urlencode(params)).read(), 'lxml') - search_results = results_page.find_all('li', 'result pane') + search_results = results_page.find_all('div', 'song-result') except (TypeError, URLError) as e: log.exception('Could not search SongSelect, %s', e) search_results = None @@ -133,9 +150,9 @@ class SongSelectImport(object): break for result in search_results: song = { - 'title': unescape(result.find('h3').string), - 'authors': [unescape(author.string) for author in result.find_all('li')], - 'link': BASE_URL + result.find('a')['href'] + 'title': unescape(result.find('p', 'song-result-title').find('a').string).strip(), + 'authors': unescape(result.find('p', 'song-result-subtitle').string).strip().split(', '), + 'link': BASE_URL + result.find('p', 'song-result-title').find('a')['href'] } if callback: callback(song) @@ -163,27 +180,37 @@ class SongSelectImport(object): if callback: callback() try: - lyrics_page = BeautifulSoup(self.opener.open(song['link'] + '/lyrics').read(), 'lxml') + lyrics_page = BeautifulSoup(self.opener.open(song['link'] + '/viewlyrics').read(), 'lxml') except (TypeError, URLError): log.exception('Could not get lyrics from SongSelect') return None if callback: callback() - song['copyright'] = '/'.join([li.string for li in song_page.find('ul', 'copyright').find_all('li')]) - song['copyright'] = unescape(song['copyright']) - song['ccli_number'] = song_page.find('ul', 'info').find('li').string.split(':')[1].strip() + copyright_elements = [] + theme_elements = [] + copyrights_regex = re.compile(r'\bCopyrights\b') + themes_regex = re.compile(r'\bThemes\b') + for ul in song_page.find_all('ul', 'song-meta-list'): + if ul.find('li', string=copyrights_regex): + copyright_elements.extend(ul.find_all('li')[1:]) + if ul.find('li', string=themes_regex): + theme_elements.extend(ul.find_all('li')[1:]) + song['copyright'] = '/'.join([unescape(li.string).strip() for li in copyright_elements]) + song['topics'] = [unescape(li.string).strip() for li in theme_elements] + song['ccli_number'] = song_page.find('div', 'song-content-data').find('ul').find('li')\ + .find('strong').string.strip() song['verses'] = [] - verses = lyrics_page.find('section', 'lyrics').find_all('p') - verse_labels = lyrics_page.find('section', 'lyrics').find_all('h3') - for counter in range(len(verses)): - verse = {'label': verse_labels[counter].string, 'lyrics': ''} - for v in verses[counter].contents: + verses = lyrics_page.find('div', 'song-viewer lyrics').find_all('p') + verse_labels = lyrics_page.find('div', 'song-viewer lyrics').find_all('h3') + for verse, label in zip(verses, verse_labels): + song_verse = {'label': unescape(label.string).strip(), 'lyrics': ''} + for v in verse.contents: if isinstance(v, NavigableString): - verse['lyrics'] = verse['lyrics'] + v.string + song_verse['lyrics'] += unescape(v.string).strip() else: - verse['lyrics'] += '\n' - verse['lyrics'] = verse['lyrics'].strip(' \n\r\t') - song['verses'].append(unescape(verse)) + song_verse['lyrics'] += '\n' + song_verse['lyrics'] = song_verse['lyrics'].strip() + song['verses'].append(song_verse) for counter, author in enumerate(song['authors']): song['authors'][counter] = unescape(author) return song @@ -199,7 +226,11 @@ class SongSelectImport(object): song_xml = SongXML() verse_order = [] for verse in song['verses']: - verse_type, verse_number = verse['label'].split(' ')[:2] + if ' ' in verse['label']: + verse_type, verse_number = verse['label'].split(' ', 1) + else: + verse_type = verse['label'] + verse_number = 1 verse_type = VerseType.from_loose_input(verse_type) verse_number = int(verse_number) song_xml.add_verse_to_lyrics(VerseType.tags[verse_type], verse_number, verse['lyrics']) @@ -220,6 +251,11 @@ class SongSelectImport(object): last_name = name_parts[1] author = Author.populate(first_name=first_name, last_name=last_name, display_name=author_name) db_song.add_author(author) + for topic_name in song.get('topics', []): + topic = self.db_manager.get_object_filtered(Topic, Topic.name == topic_name) + if not topic: + topic = Topic.populate(name=topic_name) + db_song.add_topic(topic) self.db_manager.save_object(db_song) return db_song diff --git a/tests/functional/openlp_plugins/songs/test_songselect.py b/tests/functional/openlp_plugins/songs/test_songselect.py index 5a94ee1ac..f0fcbdf51 100644 --- a/tests/functional/openlp_plugins/songs/test_songselect.py +++ b/tests/functional/openlp_plugins/songs/test_songselect.py @@ -71,7 +71,7 @@ class TestSongSelectImport(TestCase, TestMixin): mocked_opener = MagicMock() mocked_build_opener.return_value = mocked_opener mocked_login_page = MagicMock() - mocked_login_page.find.return_value = {'value': 'blah'} + mocked_login_page.find.side_effect = [{'value': 'blah'}, None] MockedBeautifulSoup.return_value = mocked_login_page mock_callback = MagicMock() importer = SongSelectImport(None) @@ -112,7 +112,7 @@ class TestSongSelectImport(TestCase, TestMixin): mocked_opener = MagicMock() mocked_build_opener.return_value = mocked_opener mocked_login_page = MagicMock() - mocked_login_page.find.side_effect = [{'value': 'blah'}, None] + mocked_login_page.find.side_effect = [{'value': 'blah'}, MagicMock()] MockedBeautifulSoup.return_value = mocked_login_page mock_callback = MagicMock() importer = SongSelectImport(None) @@ -165,7 +165,7 @@ class TestSongSelectImport(TestCase, TestMixin): self.assertEqual(0, mock_callback.call_count, 'callback should not have been called') self.assertEqual(1, mocked_opener.open.call_count, 'open should have been called once') self.assertEqual(1, mocked_results_page.find_all.call_count, 'find_all should have been called once') - mocked_results_page.find_all.assert_called_with('li', 'result pane') + mocked_results_page.find_all.assert_called_with('div', 'song-result') self.assertEqual([], results, 'The search method should have returned an empty list') @patch('openlp.plugins.songs.lib.songselect.build_opener') @@ -177,12 +177,18 @@ class TestSongSelectImport(TestCase, TestMixin): # GIVEN: A bunch of mocked out stuff and an importer object # first search result mocked_result1 = MagicMock() - mocked_result1.find.side_effect = [MagicMock(string='Title 1'), {'href': '/url1'}] - mocked_result1.find_all.return_value = [MagicMock(string='Author 1-1'), MagicMock(string='Author 1-2')] + mocked_result1.find.side_effect = [ + MagicMock(find=MagicMock(return_value=MagicMock(string='Title 1'))), + MagicMock(string='James, John'), + MagicMock(find=MagicMock(return_value={'href': '/url1'})) + ] # second search result mocked_result2 = MagicMock() - mocked_result2.find.side_effect = [MagicMock(string='Title 2'), {'href': '/url2'}] - mocked_result2.find_all.return_value = [MagicMock(string='Author 2-1'), MagicMock(string='Author 2-2')] + mocked_result2.find.side_effect = [ + MagicMock(find=MagicMock(return_value=MagicMock(string='Title 2'))), + MagicMock(string='Philip'), + MagicMock(find=MagicMock(return_value={'href': '/url2'})) + ] # rest of the stuff mocked_opener = MagicMock() mocked_build_opener.return_value = mocked_opener @@ -196,13 +202,14 @@ class TestSongSelectImport(TestCase, TestMixin): results = importer.search('text', 1000, mock_callback) # THEN: callback was never called, open was called once, find_all was called once, an empty list returned + self.maxDiff = None self.assertEqual(2, mock_callback.call_count, 'callback should have been called twice') self.assertEqual(2, mocked_opener.open.call_count, 'open should have been called twice') self.assertEqual(2, mocked_results_page.find_all.call_count, 'find_all should have been called twice') - mocked_results_page.find_all.assert_called_with('li', 'result pane') + mocked_results_page.find_all.assert_called_with('div', 'song-result') expected_list = [ - {'title': 'Title 1', 'authors': ['Author 1-1', 'Author 1-2'], 'link': BASE_URL + '/url1'}, - {'title': 'Title 2', 'authors': ['Author 2-1', 'Author 2-2'], 'link': BASE_URL + '/url2'} + {'title': 'Title 1', 'authors': ['James', 'John'], 'link': BASE_URL + '/url1'}, + {'title': 'Title 2', 'authors': ['Philip'], 'link': BASE_URL + '/url2'} ] self.assertListEqual(expected_list, results, 'The search method should have returned two songs') @@ -215,16 +222,25 @@ class TestSongSelectImport(TestCase, TestMixin): # GIVEN: A bunch of mocked out stuff and an importer object # first search result mocked_result1 = MagicMock() - mocked_result1.find.side_effect = [MagicMock(string='Title 1'), {'href': '/url1'}] - mocked_result1.find_all.return_value = [MagicMock(string='Author 1-1'), MagicMock(string='Author 1-2')] + mocked_result1.find.side_effect = [ + MagicMock(find=MagicMock(return_value=MagicMock(string='Title 1'))), + MagicMock(string='James, John'), + MagicMock(find=MagicMock(return_value={'href': '/url1'})) + ] # second search result mocked_result2 = MagicMock() - mocked_result2.find.side_effect = [MagicMock(string='Title 2'), {'href': '/url2'}] - mocked_result2.find_all.return_value = [MagicMock(string='Author 2-1'), MagicMock(string='Author 2-2')] + mocked_result2.find.side_effect = [ + MagicMock(find=MagicMock(return_value=MagicMock(string='Title 2'))), + MagicMock(string='Philip'), + MagicMock(find=MagicMock(return_value={'href': '/url2'})) + ] # third search result mocked_result3 = MagicMock() - mocked_result3.find.side_effect = [MagicMock(string='Title 3'), {'href': '/url3'}] - mocked_result3.find_all.return_value = [MagicMock(string='Author 3-1'), MagicMock(string='Author 3-2')] + mocked_result3.find.side_effect = [ + MagicMock(find=MagicMock(return_value=MagicMock(string='Title 3'))), + MagicMock(string='Luke, Matthew'), + MagicMock(find=MagicMock(return_value={'href': '/url3'})) + ] # rest of the stuff mocked_opener = MagicMock() mocked_build_opener.return_value = mocked_opener @@ -241,9 +257,9 @@ class TestSongSelectImport(TestCase, TestMixin): self.assertEqual(2, mock_callback.call_count, 'callback should have been called twice') self.assertEqual(2, mocked_opener.open.call_count, 'open should have been called twice') self.assertEqual(2, mocked_results_page.find_all.call_count, 'find_all should have been called twice') - mocked_results_page.find_all.assert_called_with('li', 'result pane') - expected_list = [{'title': 'Title 1', 'authors': ['Author 1-1', 'Author 1-2'], 'link': BASE_URL + '/url1'}, - {'title': 'Title 2', 'authors': ['Author 2-1', 'Author 2-2'], 'link': BASE_URL + '/url2'}] + mocked_results_page.find_all.assert_called_with('div', 'song-result') + expected_list = [{'title': 'Title 1', 'authors': ['James', 'John'], 'link': BASE_URL + '/url1'}, + {'title': 'Title 2', 'authors': ['Philip'], 'link': BASE_URL + '/url2'}] self.assertListEqual(expected_list, results, 'The search method should have returned two songs') @patch('openlp.plugins.songs.lib.songselect.build_opener') @@ -337,7 +353,7 @@ class TestSongSelectImport(TestCase, TestMixin): self.assertIsNotNone(result, 'The get_song() method should have returned a song dictionary') self.assertEqual(2, mocked_lyrics_page.find.call_count, 'The find() method should have been called twice') self.assertEqual(2, mocked_find_all.call_count, 'The find_all() method should have been called twice') - self.assertEqual([call('section', 'lyrics'), call('section', 'lyrics')], + self.assertEqual([call('div', 'song-viewer lyrics'), call('div', 'song-viewer lyrics')], mocked_lyrics_page.find.call_args_list, 'The find() method should have been called with the right arguments') self.assertEqual([call('p'), call('h3')], mocked_find_all.call_args_list, @@ -419,8 +435,9 @@ class TestSongSelectImport(TestCase, TestMixin): self.assertEqual(1, len(result.authors_songs), 'There should only be one author') @patch('openlp.plugins.songs.lib.songselect.clean_song') + @patch('openlp.plugins.songs.lib.songselect.Topic') @patch('openlp.plugins.songs.lib.songselect.Author') - def save_song_unknown_author_test(self, MockedAuthor, mocked_clean_song): + def save_song_unknown_author_test(self, MockedAuthor, MockedTopic, mocked_clean_song): """ Test that saving a song with an author name of only one word performs the correct actions """ @@ -437,6 +454,7 @@ class TestSongSelectImport(TestCase, TestMixin): 'ccli_number': '123456' } MockedAuthor.display_name.__eq__.return_value = False + MockedTopic.name.__eq__.return_value = False mocked_db_manager = MagicMock() mocked_db_manager.get_object_filtered.return_value = None importer = SongSelectImport(mocked_db_manager)