forked from openlp/openlp
Changed ZionWorxImport to parse CSV database dump (XML had invalid syntax)
This commit is contained in:
parent
b7cde8938c
commit
fc87e58a3c
@ -73,7 +73,7 @@ class CSVBible(BibleDB):
|
||||
|
||||
def __init__(self, parent, **kwargs):
|
||||
"""
|
||||
Loads a Bible from a set of CVS files.
|
||||
Loads a Bible from a set of CSV files.
|
||||
This class assumes the files contain all the information and
|
||||
a clean bible is being loaded.
|
||||
"""
|
||||
|
@ -28,10 +28,8 @@
|
||||
The :mod:`zionworximport` module provides the functionality for importing
|
||||
ZionWorx songs into the OpenLP database.
|
||||
"""
|
||||
import csv
|
||||
import logging
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from openlp.core.lib import translate
|
||||
from openlp.plugins.songs.lib.songimport import SongImport
|
||||
@ -40,40 +38,81 @@ log = logging.getLogger(__name__)
|
||||
|
||||
class ZionWorxImport(SongImport):
|
||||
"""
|
||||
The :class:`ZionWorxImport` class provides the ability to import...
|
||||
The :class:`ZionWorxImport` class provides the ability to import songs
|
||||
from ZionWorx, via a dump of the ZionWorx database to a CSV file.
|
||||
|
||||
ZionWorx song database fields:
|
||||
|
||||
* ``SongNum`` Song ID. Discarded by importer.
|
||||
* ``Title1`` Main Title.
|
||||
* ``Title2`` Alternate Title.
|
||||
* ``Lyrics`` Song verses, separated by blank lines.
|
||||
* ``Writer`` Song author(s).
|
||||
* ``Copyright`` Copyright information
|
||||
* ``Keywords`` Discarded by importer.
|
||||
* ``DefaultStyle`` Discarded by importer.
|
||||
|
||||
ZionWorx has no native export function; it uses the proprietary TurboDB
|
||||
database engine. The TurboDB vendor, dataWeb, provides tools which can
|
||||
export TurboDB tables to other formats, such as freeware console tool
|
||||
TurboDB Data Exchange which is available for Windows and Linux. This command
|
||||
exports the ZionWorx songs table to a CSV file:
|
||||
|
||||
``tdbdatax MainTable.dat songstable.csv -fsdf -s, -qd``
|
||||
|
||||
* ``-f`` Table format: ``sdf`` denotes text file.
|
||||
* ``-s`` Separator character between fields.
|
||||
* ``-q`` Quote character surrounding fields. ``d`` denotes double-quote.
|
||||
|
||||
CSV format expected by importer:
|
||||
|
||||
* Fields separated by comma ``,``
|
||||
* Fields surrounded by double-quotes ``"``. This enables fields (such as
|
||||
Lyrics) to include new-lines and commas. Double-quotes within a field
|
||||
are denoted by two double-quotes ``""``
|
||||
* Note: This is the default format of the Python ``csv`` module.
|
||||
|
||||
"""
|
||||
|
||||
def doImport(self):
|
||||
"""
|
||||
Receive ... to import.
|
||||
Receive a CSV file (from a ZionWorx database dump) to import.
|
||||
"""
|
||||
#open xml file
|
||||
with open(self.importSource, 'rb') as f:
|
||||
songs_xml = unicode(f.read(), u'utf-8')
|
||||
# check single xml file
|
||||
if not re.match(ur' *<\?xml[^<>]*\?>', songs_xml):
|
||||
# Error: invalid file (no XML declaration)
|
||||
print u'Error: invalid file (no XML declaration)'
|
||||
else:
|
||||
# clean invalid XML
|
||||
# remove DefaultStyle attribute if non-empty
|
||||
songs_xml = re.sub(ur'DefaultStyle=".+" />', u'/>', songs_xml)
|
||||
# replace & with & (skip existing entities)
|
||||
songs_xml = re.sub(ur'&(?![a-zA-Z#][a-zA-Z0-9]*;)', u'&',
|
||||
songs_xml)
|
||||
# replace < with < (skip known <tags>)
|
||||
songs_xml = re.sub(ur'<(?![?DMFR/])', u'<', songs_xml)
|
||||
# replace " within Lyrics attribute with "
|
||||
songs_xml = re.sub(ur'(?<=Lyrics=")([^<]*)(?=" Writer=)',
|
||||
self._escapeQuotes, songs_xml)
|
||||
print songs_xml
|
||||
|
||||
# parse XML
|
||||
tree = etree.fromstring(songs_xml.encode(u'utf-8'))
|
||||
for song in tree[1].iterchildren():
|
||||
for attrib, value in song.attrib.items():
|
||||
print attrib + ':', value
|
||||
print ''
|
||||
|
||||
def _escapeQuotes(self, m):
|
||||
return m.group(0).replace('"', '"')
|
||||
if not os.path.isfile(self.importSource):
|
||||
self.logError(unicode(translate('SongsPlugin.ZionWorxImport',
|
||||
'No songs to import.')),
|
||||
unicode(translate('SongsPlugin.ZionWorxImport',
|
||||
'No %s CSV file found.' % WizardStrings.ZW)))
|
||||
return
|
||||
with open(self.importSource, 'rb') as songs_file:
|
||||
songs_reader = csv.reader(songs_file)
|
||||
try:
|
||||
num_records = sum(1 for _ in songs_reader)
|
||||
except csv.Error, e:
|
||||
self.logError(unicode(translate('SongsPlugin.ZionWorxImport',
|
||||
'Error reading CSV file.')),
|
||||
unicode(translate('SongsPlugin.ZionWorxImport',
|
||||
'Line %d: %s' % songs_reader.line_num, e)))
|
||||
log.debug(u'%s records found in CSV file' % num_records)
|
||||
self.importWizard.progressBar.setMaximum(num_records)
|
||||
fieldnames = [u'SongNum', u'Title1', u'Title2', u'Lyrics',
|
||||
u'Writer', u'Copyright', u'Keywords', u'DefaultStyle']
|
||||
songs_reader_dict= csv.DictReader(songs_file, fieldnames)
|
||||
try:
|
||||
for record in songs_reader_dict:
|
||||
if self.stopImportFlag:
|
||||
return
|
||||
self.setDefaults()
|
||||
self.title = unicode(record[u'Title1'])
|
||||
if record[u'Title2']:
|
||||
self.alternateTitle = unicode(record[u'Title2'])
|
||||
self.parseAuthor(unicode(record[u'Writer']))
|
||||
self.addCopyright(unicode(record[u'Copyright']))
|
||||
self.processSongText(unicode(record[u'Lyrics']))
|
||||
if not self.finish():
|
||||
self.logError(self.title)
|
||||
except csv.Error, e:
|
||||
self.logError(unicode(translate('SongsPlugin.ZionWorxImport',
|
||||
'Error reading CSV file.')),
|
||||
unicode(translate('SongsPlugin.ZionWorxImport',
|
||||
'Line %d: %s' % songs_reader_dict.line_num, e)))
|
||||
|
Loading…
Reference in New Issue
Block a user