Fix bug #1608194: Adapt the importer to the new CCLI SongSelect website

Fixes: https://launchpad.net/bugs/1608194
This commit is contained in:
Raoul Snyman 2016-08-11 16:18:49 +02:00
parent 989356c24e
commit ae93a6e33d
2 changed files with 70 additions and 34 deletions

View File

@ -32,9 +32,8 @@ from PyQt5 import QtWidgets
from openlp.core.common import AppLocation from openlp.core.common import AppLocation
from openlp.core.lib import translate from openlp.core.lib import translate
from openlp.core.utils import CONTROL_CHARS from openlp.core.utils import CONTROL_CHARS
from openlp.plugins.songs.lib.db import MediaFile, Song from openlp.plugins.songs.lib.db import Author, MediaFile, Song, Topic
from .db import Author from openlp.plugins.songs.lib.ui import SongStrings
from .ui import SongStrings
log = logging.getLogger(__name__) log = logging.getLogger(__name__)

View File

@ -24,6 +24,8 @@ The :mod:`~openlp.plugins.songs.lib.songselect` module contains the SongSelect i
""" """
import logging import logging
import sys import sys
import random
import re
from http.cookiejar import CookieJar from http.cookiejar import CookieJar
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import HTTPCookieProcessor, URLError, build_opener from urllib.request import HTTPCookieProcessor, URLError, build_opener
@ -32,14 +34,19 @@ from html import unescape
from bs4 import BeautifulSoup, NavigableString from bs4 import BeautifulSoup, NavigableString
from openlp.plugins.songs.lib import Song, VerseType, clean_song, Author from openlp.plugins.songs.lib import Song, Author, Topic, VerseType, clean_song
from openlp.plugins.songs.lib.openlyricsxml import SongXML from openlp.plugins.songs.lib.openlyricsxml import SongXML
USER_AGENT = 'Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; GT-I9000 ' \ USER_AGENTS = [
'Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 ' \ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
'Mobile Safari/534.30' 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36',
BASE_URL = 'https://mobile.songselect.com' 'Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0',
LOGIN_URL = BASE_URL + '/account/login' 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0'
]
BASE_URL = 'https://songselect.ccli.com'
LOGIN_PAGE = 'https://profile.ccli.com/account/signin?appContext=SongSelect&returnUrl=https%3a%2f%2fsongselect.ccli.com%2f'
LOGIN_URL = 'https://profile.ccli.com/'
LOGOUT_URL = BASE_URL + '/account/logout' LOGOUT_URL = BASE_URL + '/account/logout'
SEARCH_URL = BASE_URL + '/search/results' SEARCH_URL = BASE_URL + '/search/results'
@ -60,7 +67,7 @@ class SongSelectImport(object):
self.db_manager = db_manager self.db_manager = db_manager
self.html_parser = HTMLParser() self.html_parser = HTMLParser()
self.opener = build_opener(HTTPCookieProcessor(CookieJar())) self.opener = build_opener(HTTPCookieProcessor(CookieJar()))
self.opener.addheaders = [('User-Agent', USER_AGENT)] self.opener.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
self.run_search = True self.run_search = True
def login(self, username, password, callback=None): def login(self, username, password, callback=None):
@ -76,7 +83,9 @@ class SongSelectImport(object):
if callback: if callback:
callback() callback()
try: try:
login_page = BeautifulSoup(self.opener.open(LOGIN_URL).read(), 'lxml') login_page_contents = self.opener.open(LOGIN_PAGE).read()
log.debug('Login page:\n%s', login_page_contents)
login_page = BeautifulSoup(login_page_contents, 'lxml')
except (TypeError, URLError) as e: except (TypeError, URLError) as e:
log.exception('Could not login to SongSelect, %s', e) log.exception('Could not login to SongSelect, %s', e)
return False return False
@ -85,18 +94,20 @@ class SongSelectImport(object):
token_input = login_page.find('input', attrs={'name': '__RequestVerificationToken'}) token_input = login_page.find('input', attrs={'name': '__RequestVerificationToken'})
data = urlencode({ data = urlencode({
'__RequestVerificationToken': token_input['value'], '__RequestVerificationToken': token_input['value'],
'UserName': username, 'emailAddress': username,
'Password': password, 'password': password,
'RememberMe': 'false' 'RememberMe': 'false'
}) })
try: try:
posted_page = BeautifulSoup(self.opener.open(LOGIN_URL, data.encode('utf-8')).read(), 'lxml') posted_page_contents = self.opener.open(LOGIN_URL, data.encode('utf-8')).read()
log.debug('Posted page:\n%s', posted_page_contents)
posted_page = BeautifulSoup(posted_page_contents, 'lxml')
except (TypeError, URLError) as e: except (TypeError, URLError) as e:
log.exception('Could not login to SongSelect, %s', e) log.exception('Could not login to SongSelect, %s', e)
return False return False
if callback: if callback:
callback() callback()
return not posted_page.find('input', attrs={'name': '__RequestVerificationToken'}) return posted_page.find('input', id='SearchText') is not None
def logout(self): def logout(self):
""" """
@ -117,7 +128,15 @@ class SongSelectImport(object):
:return: List of songs :return: List of songs
""" """
self.run_search = True self.run_search = True
params = {'allowredirect': 'false', 'SearchTerm': search_text} params = {
'SongContent': '',
'PrimaryLanguage': '',
'Keys': '',
'Themes': '',
'List': '',
'Sort': '',
'SearchText': search_text
}
current_page = 1 current_page = 1
songs = [] songs = []
while self.run_search: while self.run_search:
@ -125,7 +144,7 @@ class SongSelectImport(object):
params['page'] = current_page params['page'] = current_page
try: try:
results_page = BeautifulSoup(self.opener.open(SEARCH_URL + '?' + urlencode(params)).read(), 'lxml') results_page = BeautifulSoup(self.opener.open(SEARCH_URL + '?' + urlencode(params)).read(), 'lxml')
search_results = results_page.find_all('li', 'result pane') search_results = results_page.find_all('div', 'song-result')
except (TypeError, URLError) as e: except (TypeError, URLError) as e:
log.exception('Could not search SongSelect, %s', e) log.exception('Could not search SongSelect, %s', e)
search_results = None search_results = None
@ -133,9 +152,9 @@ class SongSelectImport(object):
break break
for result in search_results: for result in search_results:
song = { song = {
'title': unescape(result.find('h3').string), 'title': unescape(result.find('p', 'song-result-title').find('a').string).strip(),
'authors': [unescape(author.string) for author in result.find_all('li')], 'authors': unescape(result.find('p', 'song-result-subtitle').string).strip().split(', '),
'link': BASE_URL + result.find('a')['href'] 'link': BASE_URL + result.find('p', 'song-result-title').find('a')['href']
} }
if callback: if callback:
callback(song) callback(song)
@ -163,27 +182,36 @@ class SongSelectImport(object):
if callback: if callback:
callback() callback()
try: try:
lyrics_page = BeautifulSoup(self.opener.open(song['link'] + '/lyrics').read(), 'lxml') lyrics_page = BeautifulSoup(self.opener.open(song['link'] + '/viewlyrics').read(), 'lxml')
except (TypeError, URLError): except (TypeError, URLError):
log.exception('Could not get lyrics from SongSelect') log.exception('Could not get lyrics from SongSelect')
return None return None
if callback: if callback:
callback() callback()
song['copyright'] = '/'.join([li.string for li in song_page.find('ul', 'copyright').find_all('li')]) copyright_elements = []
song['copyright'] = unescape(song['copyright']) theme_elements = []
song['ccli_number'] = song_page.find('ul', 'info').find('li').string.split(':')[1].strip() copyrights_regex = re.compile(r'\bCopyrights\b')
themes_regex = re.compile(r'\bThemes\b')
for ul in song_page.find_all('ul', 'song-meta-list'):
if ul.find('li', string=copyrights_regex):
copyright_elements.extend(ul.find_all('li')[1:])
if ul.find('li', string=themes_regex):
theme_elements.extend(ul.find_all('li')[1:])
song['copyright'] = '/'.join([unescape(li.string).strip() for li in copyright_elements])
song['topics'] = [unescape(li.string).strip() for li in theme_elements]
song['ccli_number'] = song_page.find('div', 'song-content-data').find('ul').find('li').find('strong').string.strip()
song['verses'] = [] song['verses'] = []
verses = lyrics_page.find('section', 'lyrics').find_all('p') verses = lyrics_page.find('div', 'song-viewer lyrics').find_all('p')
verse_labels = lyrics_page.find('section', 'lyrics').find_all('h3') verse_labels = lyrics_page.find('div', 'song-viewer lyrics').find_all('h3')
for counter in range(len(verses)): for verse, label in zip(verses, verse_labels):
verse = {'label': verse_labels[counter].string, 'lyrics': ''} song_verse = {'label': unescape(label.string).strip(), 'lyrics': ''}
for v in verses[counter].contents: for v in verse.contents:
if isinstance(v, NavigableString): if isinstance(v, NavigableString):
verse['lyrics'] = verse['lyrics'] + v.string song_verse['lyrics'] += unescape(v.string).strip()
else: else:
verse['lyrics'] += '\n' song_verse['lyrics'] += '\n'
verse['lyrics'] = verse['lyrics'].strip(' \n\r\t') song_verse['lyrics'] = song_verse['lyrics'].strip()
song['verses'].append(unescape(verse)) song['verses'].append(song_verse)
for counter, author in enumerate(song['authors']): for counter, author in enumerate(song['authors']):
song['authors'][counter] = unescape(author) song['authors'][counter] = unescape(author)
return song return song
@ -199,7 +227,11 @@ class SongSelectImport(object):
song_xml = SongXML() song_xml = SongXML()
verse_order = [] verse_order = []
for verse in song['verses']: for verse in song['verses']:
verse_type, verse_number = verse['label'].split(' ')[:2] if ' ' in verse['label']:
verse_type, verse_number = verse['label'].split(' ', 1)
else:
verse_type = verse['label']
verse_number = 1
verse_type = VerseType.from_loose_input(verse_type) verse_type = VerseType.from_loose_input(verse_type)
verse_number = int(verse_number) verse_number = int(verse_number)
song_xml.add_verse_to_lyrics(VerseType.tags[verse_type], verse_number, verse['lyrics']) song_xml.add_verse_to_lyrics(VerseType.tags[verse_type], verse_number, verse['lyrics'])
@ -220,6 +252,11 @@ class SongSelectImport(object):
last_name = name_parts[1] last_name = name_parts[1]
author = Author.populate(first_name=first_name, last_name=last_name, display_name=author_name) author = Author.populate(first_name=first_name, last_name=last_name, display_name=author_name)
db_song.add_author(author) db_song.add_author(author)
for topic_name in song['topics']:
topic = self.db_manager.get_object_filtered(Topic, Topic.name == topic_name)
if not topic:
topic = Topic.populate(name=topic_name)
db_song.topics.append(topic)
self.db_manager.save_object(db_song) self.db_manager.save_object(db_song)
return db_song return db_song