forked from openlp/openlp
Fix bug #1608194: Adapt the importer to the new CCLI SongSelect website
Fixes: https://launchpad.net/bugs/1608194
This commit is contained in:
parent
989356c24e
commit
ae93a6e33d
@ -32,9 +32,8 @@ from PyQt5 import QtWidgets
|
|||||||
from openlp.core.common import AppLocation
|
from openlp.core.common import AppLocation
|
||||||
from openlp.core.lib import translate
|
from openlp.core.lib import translate
|
||||||
from openlp.core.utils import CONTROL_CHARS
|
from openlp.core.utils import CONTROL_CHARS
|
||||||
from openlp.plugins.songs.lib.db import MediaFile, Song
|
from openlp.plugins.songs.lib.db import Author, MediaFile, Song, Topic
|
||||||
from .db import Author
|
from openlp.plugins.songs.lib.ui import SongStrings
|
||||||
from .ui import SongStrings
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -24,6 +24,8 @@ The :mod:`~openlp.plugins.songs.lib.songselect` module contains the SongSelect i
|
|||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
import random
|
||||||
|
import re
|
||||||
from http.cookiejar import CookieJar
|
from http.cookiejar import CookieJar
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from urllib.request import HTTPCookieProcessor, URLError, build_opener
|
from urllib.request import HTTPCookieProcessor, URLError, build_opener
|
||||||
@ -32,14 +34,19 @@ from html import unescape
|
|||||||
|
|
||||||
from bs4 import BeautifulSoup, NavigableString
|
from bs4 import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
from openlp.plugins.songs.lib import Song, VerseType, clean_song, Author
|
from openlp.plugins.songs.lib import Song, Author, Topic, VerseType, clean_song
|
||||||
from openlp.plugins.songs.lib.openlyricsxml import SongXML
|
from openlp.plugins.songs.lib.openlyricsxml import SongXML
|
||||||
|
|
||||||
USER_AGENT = 'Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; GT-I9000 ' \
|
USER_AGENTS = [
|
||||||
'Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 ' \
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
|
||||||
'Mobile Safari/534.30'
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36',
|
||||||
BASE_URL = 'https://mobile.songselect.com'
|
'Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0',
|
||||||
LOGIN_URL = BASE_URL + '/account/login'
|
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',
|
||||||
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||||
|
]
|
||||||
|
BASE_URL = 'https://songselect.ccli.com'
|
||||||
|
LOGIN_PAGE = 'https://profile.ccli.com/account/signin?appContext=SongSelect&returnUrl=https%3a%2f%2fsongselect.ccli.com%2f'
|
||||||
|
LOGIN_URL = 'https://profile.ccli.com/'
|
||||||
LOGOUT_URL = BASE_URL + '/account/logout'
|
LOGOUT_URL = BASE_URL + '/account/logout'
|
||||||
SEARCH_URL = BASE_URL + '/search/results'
|
SEARCH_URL = BASE_URL + '/search/results'
|
||||||
|
|
||||||
@ -60,7 +67,7 @@ class SongSelectImport(object):
|
|||||||
self.db_manager = db_manager
|
self.db_manager = db_manager
|
||||||
self.html_parser = HTMLParser()
|
self.html_parser = HTMLParser()
|
||||||
self.opener = build_opener(HTTPCookieProcessor(CookieJar()))
|
self.opener = build_opener(HTTPCookieProcessor(CookieJar()))
|
||||||
self.opener.addheaders = [('User-Agent', USER_AGENT)]
|
self.opener.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
|
||||||
self.run_search = True
|
self.run_search = True
|
||||||
|
|
||||||
def login(self, username, password, callback=None):
|
def login(self, username, password, callback=None):
|
||||||
@ -76,7 +83,9 @@ class SongSelectImport(object):
|
|||||||
if callback:
|
if callback:
|
||||||
callback()
|
callback()
|
||||||
try:
|
try:
|
||||||
login_page = BeautifulSoup(self.opener.open(LOGIN_URL).read(), 'lxml')
|
login_page_contents = self.opener.open(LOGIN_PAGE).read()
|
||||||
|
log.debug('Login page:\n%s', login_page_contents)
|
||||||
|
login_page = BeautifulSoup(login_page_contents, 'lxml')
|
||||||
except (TypeError, URLError) as e:
|
except (TypeError, URLError) as e:
|
||||||
log.exception('Could not login to SongSelect, %s', e)
|
log.exception('Could not login to SongSelect, %s', e)
|
||||||
return False
|
return False
|
||||||
@ -85,18 +94,20 @@ class SongSelectImport(object):
|
|||||||
token_input = login_page.find('input', attrs={'name': '__RequestVerificationToken'})
|
token_input = login_page.find('input', attrs={'name': '__RequestVerificationToken'})
|
||||||
data = urlencode({
|
data = urlencode({
|
||||||
'__RequestVerificationToken': token_input['value'],
|
'__RequestVerificationToken': token_input['value'],
|
||||||
'UserName': username,
|
'emailAddress': username,
|
||||||
'Password': password,
|
'password': password,
|
||||||
'RememberMe': 'false'
|
'RememberMe': 'false'
|
||||||
})
|
})
|
||||||
try:
|
try:
|
||||||
posted_page = BeautifulSoup(self.opener.open(LOGIN_URL, data.encode('utf-8')).read(), 'lxml')
|
posted_page_contents = self.opener.open(LOGIN_URL, data.encode('utf-8')).read()
|
||||||
|
log.debug('Posted page:\n%s', posted_page_contents)
|
||||||
|
posted_page = BeautifulSoup(posted_page_contents, 'lxml')
|
||||||
except (TypeError, URLError) as e:
|
except (TypeError, URLError) as e:
|
||||||
log.exception('Could not login to SongSelect, %s', e)
|
log.exception('Could not login to SongSelect, %s', e)
|
||||||
return False
|
return False
|
||||||
if callback:
|
if callback:
|
||||||
callback()
|
callback()
|
||||||
return not posted_page.find('input', attrs={'name': '__RequestVerificationToken'})
|
return posted_page.find('input', id='SearchText') is not None
|
||||||
|
|
||||||
def logout(self):
|
def logout(self):
|
||||||
"""
|
"""
|
||||||
@ -117,7 +128,15 @@ class SongSelectImport(object):
|
|||||||
:return: List of songs
|
:return: List of songs
|
||||||
"""
|
"""
|
||||||
self.run_search = True
|
self.run_search = True
|
||||||
params = {'allowredirect': 'false', 'SearchTerm': search_text}
|
params = {
|
||||||
|
'SongContent': '',
|
||||||
|
'PrimaryLanguage': '',
|
||||||
|
'Keys': '',
|
||||||
|
'Themes': '',
|
||||||
|
'List': '',
|
||||||
|
'Sort': '',
|
||||||
|
'SearchText': search_text
|
||||||
|
}
|
||||||
current_page = 1
|
current_page = 1
|
||||||
songs = []
|
songs = []
|
||||||
while self.run_search:
|
while self.run_search:
|
||||||
@ -125,7 +144,7 @@ class SongSelectImport(object):
|
|||||||
params['page'] = current_page
|
params['page'] = current_page
|
||||||
try:
|
try:
|
||||||
results_page = BeautifulSoup(self.opener.open(SEARCH_URL + '?' + urlencode(params)).read(), 'lxml')
|
results_page = BeautifulSoup(self.opener.open(SEARCH_URL + '?' + urlencode(params)).read(), 'lxml')
|
||||||
search_results = results_page.find_all('li', 'result pane')
|
search_results = results_page.find_all('div', 'song-result')
|
||||||
except (TypeError, URLError) as e:
|
except (TypeError, URLError) as e:
|
||||||
log.exception('Could not search SongSelect, %s', e)
|
log.exception('Could not search SongSelect, %s', e)
|
||||||
search_results = None
|
search_results = None
|
||||||
@ -133,9 +152,9 @@ class SongSelectImport(object):
|
|||||||
break
|
break
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
song = {
|
song = {
|
||||||
'title': unescape(result.find('h3').string),
|
'title': unescape(result.find('p', 'song-result-title').find('a').string).strip(),
|
||||||
'authors': [unescape(author.string) for author in result.find_all('li')],
|
'authors': unescape(result.find('p', 'song-result-subtitle').string).strip().split(', '),
|
||||||
'link': BASE_URL + result.find('a')['href']
|
'link': BASE_URL + result.find('p', 'song-result-title').find('a')['href']
|
||||||
}
|
}
|
||||||
if callback:
|
if callback:
|
||||||
callback(song)
|
callback(song)
|
||||||
@ -163,27 +182,36 @@ class SongSelectImport(object):
|
|||||||
if callback:
|
if callback:
|
||||||
callback()
|
callback()
|
||||||
try:
|
try:
|
||||||
lyrics_page = BeautifulSoup(self.opener.open(song['link'] + '/lyrics').read(), 'lxml')
|
lyrics_page = BeautifulSoup(self.opener.open(song['link'] + '/viewlyrics').read(), 'lxml')
|
||||||
except (TypeError, URLError):
|
except (TypeError, URLError):
|
||||||
log.exception('Could not get lyrics from SongSelect')
|
log.exception('Could not get lyrics from SongSelect')
|
||||||
return None
|
return None
|
||||||
if callback:
|
if callback:
|
||||||
callback()
|
callback()
|
||||||
song['copyright'] = '/'.join([li.string for li in song_page.find('ul', 'copyright').find_all('li')])
|
copyright_elements = []
|
||||||
song['copyright'] = unescape(song['copyright'])
|
theme_elements = []
|
||||||
song['ccli_number'] = song_page.find('ul', 'info').find('li').string.split(':')[1].strip()
|
copyrights_regex = re.compile(r'\bCopyrights\b')
|
||||||
|
themes_regex = re.compile(r'\bThemes\b')
|
||||||
|
for ul in song_page.find_all('ul', 'song-meta-list'):
|
||||||
|
if ul.find('li', string=copyrights_regex):
|
||||||
|
copyright_elements.extend(ul.find_all('li')[1:])
|
||||||
|
if ul.find('li', string=themes_regex):
|
||||||
|
theme_elements.extend(ul.find_all('li')[1:])
|
||||||
|
song['copyright'] = '/'.join([unescape(li.string).strip() for li in copyright_elements])
|
||||||
|
song['topics'] = [unescape(li.string).strip() for li in theme_elements]
|
||||||
|
song['ccli_number'] = song_page.find('div', 'song-content-data').find('ul').find('li').find('strong').string.strip()
|
||||||
song['verses'] = []
|
song['verses'] = []
|
||||||
verses = lyrics_page.find('section', 'lyrics').find_all('p')
|
verses = lyrics_page.find('div', 'song-viewer lyrics').find_all('p')
|
||||||
verse_labels = lyrics_page.find('section', 'lyrics').find_all('h3')
|
verse_labels = lyrics_page.find('div', 'song-viewer lyrics').find_all('h3')
|
||||||
for counter in range(len(verses)):
|
for verse, label in zip(verses, verse_labels):
|
||||||
verse = {'label': verse_labels[counter].string, 'lyrics': ''}
|
song_verse = {'label': unescape(label.string).strip(), 'lyrics': ''}
|
||||||
for v in verses[counter].contents:
|
for v in verse.contents:
|
||||||
if isinstance(v, NavigableString):
|
if isinstance(v, NavigableString):
|
||||||
verse['lyrics'] = verse['lyrics'] + v.string
|
song_verse['lyrics'] += unescape(v.string).strip()
|
||||||
else:
|
else:
|
||||||
verse['lyrics'] += '\n'
|
song_verse['lyrics'] += '\n'
|
||||||
verse['lyrics'] = verse['lyrics'].strip(' \n\r\t')
|
song_verse['lyrics'] = song_verse['lyrics'].strip()
|
||||||
song['verses'].append(unescape(verse))
|
song['verses'].append(song_verse)
|
||||||
for counter, author in enumerate(song['authors']):
|
for counter, author in enumerate(song['authors']):
|
||||||
song['authors'][counter] = unescape(author)
|
song['authors'][counter] = unescape(author)
|
||||||
return song
|
return song
|
||||||
@ -199,7 +227,11 @@ class SongSelectImport(object):
|
|||||||
song_xml = SongXML()
|
song_xml = SongXML()
|
||||||
verse_order = []
|
verse_order = []
|
||||||
for verse in song['verses']:
|
for verse in song['verses']:
|
||||||
verse_type, verse_number = verse['label'].split(' ')[:2]
|
if ' ' in verse['label']:
|
||||||
|
verse_type, verse_number = verse['label'].split(' ', 1)
|
||||||
|
else:
|
||||||
|
verse_type = verse['label']
|
||||||
|
verse_number = 1
|
||||||
verse_type = VerseType.from_loose_input(verse_type)
|
verse_type = VerseType.from_loose_input(verse_type)
|
||||||
verse_number = int(verse_number)
|
verse_number = int(verse_number)
|
||||||
song_xml.add_verse_to_lyrics(VerseType.tags[verse_type], verse_number, verse['lyrics'])
|
song_xml.add_verse_to_lyrics(VerseType.tags[verse_type], verse_number, verse['lyrics'])
|
||||||
@ -220,6 +252,11 @@ class SongSelectImport(object):
|
|||||||
last_name = name_parts[1]
|
last_name = name_parts[1]
|
||||||
author = Author.populate(first_name=first_name, last_name=last_name, display_name=author_name)
|
author = Author.populate(first_name=first_name, last_name=last_name, display_name=author_name)
|
||||||
db_song.add_author(author)
|
db_song.add_author(author)
|
||||||
|
for topic_name in song['topics']:
|
||||||
|
topic = self.db_manager.get_object_filtered(Topic, Topic.name == topic_name)
|
||||||
|
if not topic:
|
||||||
|
topic = Topic.populate(name=topic_name)
|
||||||
|
db_song.topics.append(topic)
|
||||||
self.db_manager.save_object(db_song)
|
self.db_manager.save_object(db_song)
|
||||||
return db_song
|
return db_song
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user