forked from openlp/openlp
Most of the way there.
This commit is contained in:
parent
01b513d3bb
commit
cf7489839d
@ -5,11 +5,12 @@ import sys
|
|||||||
import os
|
import os
|
||||||
import sqlite
|
import sqlite
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import chardet
|
import re
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
from traceback import format_tb as get_traceback
|
from traceback import format_tb as get_traceback
|
||||||
|
|
||||||
# Some global options to be used throughout the import process
|
# Some global options to be used throughout the import process
|
||||||
|
dirty_chars = re.compile(r'\W', re.UNICODE)
|
||||||
verbose = False
|
verbose = False
|
||||||
debug = False
|
debug = False
|
||||||
old_cursor = None
|
old_cursor = None
|
||||||
@ -17,20 +18,20 @@ new_cursor = None
|
|||||||
|
|
||||||
# SQL create statments
|
# SQL create statments
|
||||||
create_statements = [
|
create_statements = [
|
||||||
u"""CREATE TABLE authors (
|
(u'table "authors"', u"""CREATE TABLE authors (
|
||||||
id INTEGER NOT NULL,
|
id INTEGER NOT NULL,
|
||||||
first_name VARCHAR(128),
|
first_name VARCHAR(128),
|
||||||
last_name VARCHAR(128),
|
last_name VARCHAR(128),
|
||||||
display_name VARCHAR(255) NOT NULL,
|
display_name VARCHAR(255) NOT NULL,
|
||||||
PRIMARY KEY (id)
|
PRIMARY KEY (id)
|
||||||
)""",
|
)"""),
|
||||||
u"""CREATE TABLE song_books (
|
(u'table "song_books"', u"""CREATE TABLE song_books (
|
||||||
id INTEGER NOT NULL,
|
id INTEGER NOT NULL,
|
||||||
name VARCHAR(128) NOT NULL,
|
name VARCHAR(128) NOT NULL,
|
||||||
publisher VARCHAR(128),
|
publisher VARCHAR(128),
|
||||||
PRIMARY KEY (id)
|
PRIMARY KEY (id)
|
||||||
)""",
|
)"""),
|
||||||
u"""CREATE TABLE songs (
|
(u'table "songs"', u"""CREATE TABLE songs (
|
||||||
id INTEGER NOT NULL,
|
id INTEGER NOT NULL,
|
||||||
song_book_id INTEGER,
|
song_book_id INTEGER,
|
||||||
title VARCHAR(255) NOT NULL,
|
title VARCHAR(255) NOT NULL,
|
||||||
@ -45,44 +46,66 @@ create_statements = [
|
|||||||
search_lyrics TEXT NOT NULL,
|
search_lyrics TEXT NOT NULL,
|
||||||
PRIMARY KEY (id),
|
PRIMARY KEY (id),
|
||||||
FOREIGN KEY(song_book_id) REFERENCES song_books (id)
|
FOREIGN KEY(song_book_id) REFERENCES song_books (id)
|
||||||
)""",
|
)"""),
|
||||||
u"""CREATE TABLE topics (
|
(u'table "topics"', u"""CREATE TABLE topics (
|
||||||
id INTEGER NOT NULL,
|
id INTEGER NOT NULL,
|
||||||
name VARCHAR(128) NOT NULL,
|
name VARCHAR(128) NOT NULL,
|
||||||
PRIMARY KEY (id)
|
PRIMARY KEY (id)
|
||||||
)""",
|
)"""),
|
||||||
u"""CREATE INDEX ix_songs_search_lyrics ON songs (search_lyrics)""",
|
(u'index "ix_songs_search_lyrics"',
|
||||||
u"""CREATE INDEX ix_songs_search_title ON songs (search_title)""",
|
u"""CREATE INDEX ix_songs_search_lyrics ON songs (search_lyrics)"""),
|
||||||
u"""CREATE TABLE authors_songs (
|
(u'index "ix_songs_search_title',
|
||||||
|
u"""CREATE INDEX ix_songs_search_title ON songs (search_title)"""),
|
||||||
|
(u'table "authors_songs"', u"""CREATE TABLE authors_songs (
|
||||||
author_id INTEGER NOT NULL,
|
author_id INTEGER NOT NULL,
|
||||||
song_id INTEGER NOT NULL,
|
song_id INTEGER NOT NULL,
|
||||||
PRIMARY KEY (author_id, song_id),
|
PRIMARY KEY (author_id, song_id),
|
||||||
FOREIGN KEY(author_id) REFERENCES authors (id),
|
FOREIGN KEY(author_id) REFERENCES authors (id),
|
||||||
FOREIGN KEY(song_id) REFERENCES songs (id)
|
FOREIGN KEY(song_id) REFERENCES songs (id)
|
||||||
)""",
|
)"""),
|
||||||
u"""CREATE TABLE songs_topics (
|
(u'table "songs_topics"', u"""CREATE TABLE songs_topics (
|
||||||
song_id INTEGER NOT NULL,
|
song_id INTEGER NOT NULL,
|
||||||
topic_id INTEGER NOT NULL,
|
topic_id INTEGER NOT NULL,
|
||||||
PRIMARY KEY (song_id, topic_id),
|
PRIMARY KEY (song_id, topic_id),
|
||||||
FOREIGN KEY(song_id) REFERENCES songs (id),
|
FOREIGN KEY(song_id) REFERENCES songs (id),
|
||||||
FOREIGN KEY(topic_id) REFERENCES topics (id)
|
FOREIGN KEY(topic_id) REFERENCES topics (id)
|
||||||
)"""
|
)""")
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def clean_string(dirty):
|
||||||
|
return dirty_chars.sub(u'', dirty).replace(u'\r\n', ' ').replace(u'\n', ' ')
|
||||||
|
|
||||||
|
def convert_string(buffer_column):
|
||||||
|
buffer_string = buffer(buffer_column)
|
||||||
|
#unicode(encoded_string.decode('cp1252', 'replace'))
|
||||||
|
return unicode(buffer_string, 'utf-8').decode('cp1252', 'replace')
|
||||||
|
|
||||||
|
def display_sql(sql, params):
|
||||||
|
prepared_params = []
|
||||||
|
for param in params:
|
||||||
|
if isinstance(param, basestring):
|
||||||
|
prepared_params.append(u'"%s"' % param)
|
||||||
|
elif isinstance(param, (int, long)):
|
||||||
|
prepared_params.append(u'%d' % param)
|
||||||
|
elif isinstance(param, (float, complex)):
|
||||||
|
prepared_params.append(u'%f' % param)
|
||||||
|
else:
|
||||||
|
prepared_params.append(u'"%s"' % str(param))
|
||||||
|
for prepared_param in prepared_params:
|
||||||
|
sql = sql.replace(u'?', prepared_param, 1)
|
||||||
|
return sql
|
||||||
|
|
||||||
def create_database():
|
def create_database():
|
||||||
global new_cursor, create_statements
|
global new_cursor, create_statements
|
||||||
if debug or verbose:
|
if debug or verbose:
|
||||||
print 'Creating new database:'
|
print 'Creating new database:'
|
||||||
else:
|
else:
|
||||||
print 'Creating new database...',
|
print 'Creating new database...',
|
||||||
for sql_create in create_statements:
|
for statement_type, sql_create in create_statements:
|
||||||
if debug:
|
if debug:
|
||||||
print '... ', sql_create.replace('\n', ' ').replace(' ', ' ')
|
print '... ', sql_create.replace('\n', ' ').replace(' ', ' ')
|
||||||
elif verbose:
|
elif verbose:
|
||||||
if sql_create[:12] == u'CREATE TABLE':
|
print '... creating %s...' % statement_type,
|
||||||
print '... creating table "%s"...' % sql_create[13, sql_create.find(u'(') - 2],
|
|
||||||
elif sql_create[:12] == u'CREATE INDEX':
|
|
||||||
print '... creating index "%s"...' % sql_create[13, sql_create.find(u'ON') - 2],
|
|
||||||
new_cursor.execute(sql_create)
|
new_cursor.execute(sql_create)
|
||||||
if verbose and not debug:
|
if verbose and not debug:
|
||||||
print 'done.'
|
print 'done.'
|
||||||
@ -105,20 +128,21 @@ def import_songs():
|
|||||||
print 'done.'
|
print 'done.'
|
||||||
author_map = {}
|
author_map = {}
|
||||||
for row in rows:
|
for row in rows:
|
||||||
names = row[1].split(u' ')
|
display_name = convert_string(row[1])
|
||||||
|
names = display_name.split(u' ')
|
||||||
first_name = names[0]
|
first_name = names[0]
|
||||||
last_name = u' '.join(names[1:])
|
last_name = u' '.join(names[1:])
|
||||||
if last_name is None:
|
if last_name is None:
|
||||||
last_name = u''
|
last_name = u''
|
||||||
sql_insert = u'INSERT INTO authors '\
|
sql_insert = u'INSERT INTO authors '\
|
||||||
'(id, first_name, last_name, display_name) '\
|
'(id, first_name, last_name, display_name) '\
|
||||||
'VALUES (NULL, "%s", "%s", "%s")'\
|
'VALUES (NULL, ?, ?, ?)'
|
||||||
% (first_name, last_name, row['displayname'])
|
sql_params = (first_name, last_name, display_name)
|
||||||
if debug:
|
if debug:
|
||||||
print '...', str(sql_insert)
|
print '...', display_sql(sql_insert, sql_params)
|
||||||
elif verbose:
|
elif verbose:
|
||||||
print '... importing "%s"' % row['displayname']
|
print '... importing "%s"' % display_name
|
||||||
new_cursor.execute(sql_insert)
|
new_cursor.execute(sql_insert, sql_params)
|
||||||
author_map[row[0]] = new_cursor.lastrowid
|
author_map[row[0]] = new_cursor.lastrowid
|
||||||
if debug:
|
if debug:
|
||||||
print ' >>> authors.authorid =', row[0], 'authors.id =', author_map[row[0]]
|
print ' >>> authors.authorid =', row[0], 'authors.id =', author_map[row[0]]
|
||||||
@ -129,36 +153,38 @@ def import_songs():
|
|||||||
else:
|
else:
|
||||||
print 'Importing songs...',
|
print 'Importing songs...',
|
||||||
if debug:
|
if debug:
|
||||||
print '... SELECT songid AS id, songtitle AS title, lyrics, copyrightinfo AS copyright FROM songs'
|
print '... SELECT songid AS id, songtitle AS title, lyrics, copyrightinfo AS copyright FROM songs...',
|
||||||
elif verbose:
|
elif verbose:
|
||||||
print '... fetching songs from old database...',
|
print '... fetching songs from old database...',
|
||||||
old_cursor.execute(u'SELECT songid AS id, songtitle AS title, lyrics, copyrightinfo AS copyright FROM songs')
|
old_cursor.execute(u'SELECT songid AS id, songtitle AS title, lyrics, copyrightinfo AS copyright FROM songs')
|
||||||
rows = old_cursor.fetchall()
|
rows = old_cursor.fetchall()
|
||||||
if not debug and verbose:
|
if debug or verbose:
|
||||||
print 'done.'
|
print 'done.'
|
||||||
song_map = {}
|
song_map = {}
|
||||||
xml_lyrics_template = u'<?xml version="1.0" encoding="utf-8"?><song version="1.0"><lyrics language="en">%s</lyrics></song>'
|
xml_lyrics_template = u'<?xml version="1.0" encoding="utf-8"?><song version="1.0"><lyrics language="en">%s</lyrics></song>'
|
||||||
xml_verse_template = u'<verse label="%d" type="Verse"><![CDATA[%s]]></verse>'
|
xml_verse_template = u'<verse label="%d" type="Verse"><![CDATA[%s]]></verse>'
|
||||||
for row in rows:
|
for row in rows:
|
||||||
print row[2].decode('iso-8859-1')
|
clean_title = convert_string(row[1])
|
||||||
text_lyrics = unicode(row[2], 'iso-8859-1').split(u'\n\n')
|
clean_lyrics = convert_string(row[2])
|
||||||
|
clean_copyright = convert_string(row[3])
|
||||||
|
text_lyrics = clean_lyrics.split(u'\n\n')
|
||||||
xml_lyrics = u''
|
xml_lyrics = u''
|
||||||
for line, verse in enumerate(text_lyrics):
|
for line, verse in enumerate(text_lyrics):
|
||||||
if not verse:
|
if not verse:
|
||||||
continue
|
continue
|
||||||
xml_lyrics += (xml_lyrics_template % (line, verse))
|
xml_lyrics += (xml_verse_template % (line + 1, verse))
|
||||||
xml_verse = xml_verse_template % xml_lyrics
|
xml_verse = xml_lyrics_template % xml_lyrics
|
||||||
clean_title = row[1]
|
search_title = clean_string(clean_title)
|
||||||
clean_lyrics = row[2]
|
search_lyrics = clean_string(clean_lyrics)
|
||||||
sql_insert = u'INSERT INTO songs '\
|
sql_insert = u'INSERT INTO songs '\
|
||||||
'(id, title, lyrics, copyright, search_title, search_lyrics) '\
|
'(id, title, lyrics, copyright, search_title, search_lyrics) '\
|
||||||
'VALUES (NULL, "%s", "%s", "%s", "%s", "%s")'\
|
'VALUES (NULL, ?, ?, ?, ?, ?)'
|
||||||
% (row[1], xml_lyrics, row[3], clean_title, clean_lyrics)
|
sql_params = (clean_title, xml_lyrics, clean_copyright, clean_title, clean_lyrics)
|
||||||
if debug:
|
if debug:
|
||||||
print '...', str(sql_insert)
|
print '...', display_sql(sql_insert, (sql_params[0], u'<xml>', sql_params[2], sql_params[3], u'string'))
|
||||||
elif verbose:
|
elif verbose:
|
||||||
print '... importing "%s"' % row[u'title']
|
print '... importing "%s"' % clean_title
|
||||||
new_cursor.execute(sql_insert)
|
new_cursor.execute(sql_insert, sql_params)
|
||||||
song_map[row[0]] = new_cursor.lastrowid
|
song_map[row[0]] = new_cursor.lastrowid
|
||||||
if not verbose and not debug:
|
if not verbose and not debug:
|
||||||
print 'done.'
|
print 'done.'
|
||||||
@ -170,22 +196,22 @@ def import_songs():
|
|||||||
print '... SELECT authorid AS author_id, songid AS song_id FROM songauthors'
|
print '... SELECT authorid AS author_id, songid AS song_id FROM songauthors'
|
||||||
elif verbose:
|
elif verbose:
|
||||||
print '... fetching song-to-author mapping from old database...',
|
print '... fetching song-to-author mapping from old database...',
|
||||||
old_cursor.execute(u'SELECT songid AS id, songtitle AS title, lyrics, copyrightinfo AS copyright FROM songs')
|
old_cursor.execute(u'SELECT authorid AS author_id, songid AS song_id FROM songauthors')
|
||||||
rows = old_cursor.fetchall()
|
rows = old_cursor.fetchall()
|
||||||
if not debug and verbose:
|
if not debug and verbose:
|
||||||
print 'done.'
|
print 'done.'
|
||||||
for row in rows:
|
for row in rows:
|
||||||
sql_insert = u'INSERT INTO authors_songs '\
|
sql_insert = u'INSERT INTO authors_songs '\
|
||||||
'(author_id, song_id) '\
|
'(author_id, song_id) '\
|
||||||
'VALUES (%d, %d)'\
|
'VALUES (?, ?)'
|
||||||
% (author_map[row[u'author_id']], song_map[row[u'song_id']])
|
sql_params = (author_map[row[0]], song_map[row[1]])
|
||||||
if debug:
|
if debug:
|
||||||
print '... ', str(sql_insert)
|
print '... ', display_sql(sql_insert, sql_params)
|
||||||
elif verbose:
|
elif verbose:
|
||||||
print '... Author %d (was %d) => Song %d (was %d)'\
|
print '... Author %d (was %d) => Song %d (was %d)'\
|
||||||
% (int(row[0]), author_map[row[0]],
|
% (int(row[0]), author_map[row[0]],
|
||||||
int(row[1]), song_map[row[1]])
|
int(row[1]), song_map[row[1]])
|
||||||
new_cursor.execute(sql_insert)
|
new_cursor.execute(sql_insert, sql_params)
|
||||||
if not verbose and not debug:
|
if not verbose and not debug:
|
||||||
print 'done.'
|
print 'done.'
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user