Changed ZionWorxImport to parse CSV database dump (XML had invalid syntax)

2012-05-24 22:12:48 +10:00 · 2012-05-24 22:12:48 +10:00 · fc87e58a3c
commit fc87e58a3c
parent b7cde8938c
2 changed files with 75 additions and 36 deletions
--- a/openlp/plugins/bibles/lib/csvbible.py
+++ b/openlp/plugins/bibles/lib/csvbible.py
@ -73,7 +73,7 @@ class CSVBible(BibleDB):

    def __init__(self, parent, **kwargs):
        """
-        Loads a Bible from a set of CVS files.
+        Loads a Bible from a set of CSV files.
        This class assumes the files contain all the information and
        a clean bible is being loaded.
        """
--- a/openlp/plugins/songs/lib/zionworximport.py
+++ b/openlp/plugins/songs/lib/zionworximport.py
@ -28,10 +28,8 @@
 The :mod:`zionworximport` module provides the functionality for importing
 ZionWorx songs into the OpenLP database.
 """
+import csv
 import logging
-import re
-
-from lxml import etree

 from openlp.core.lib import translate
 from openlp.plugins.songs.lib.songimport import SongImport
@ -40,40 +38,81 @@ log = logging.getLogger(__name__)

 class ZionWorxImport(SongImport):
    """
-    The :class:`ZionWorxImport` class provides the ability to import...
+    The :class:`ZionWorxImport` class provides the ability to import songs
+    from ZionWorx, via a dump of the ZionWorx database to a CSV file.
+
+    ZionWorx song database fields:
+
+        * ``SongNum`` Song ID. Discarded by importer.
+        * ``Title1`` Main Title.
+        * ``Title2`` Alternate Title.
+        * ``Lyrics`` Song verses, separated by blank lines.
+        * ``Writer`` Song author(s).
+        * ``Copyright`` Copyright information
+        * ``Keywords`` Discarded by importer.
+        * ``DefaultStyle`` Discarded by importer.
+
+    ZionWorx has no native export function; it uses the proprietary TurboDB
+    database engine. The TurboDB vendor, dataWeb, provides tools which can
+    export TurboDB tables to other formats, such as freeware console tool
+    TurboDB Data Exchange which is available for Windows and Linux. This command
+    exports the ZionWorx songs table to a CSV file:
+
+    ``tdbdatax MainTable.dat songstable.csv -fsdf -s, -qd``
+
+        * ``-f`` Table format: ``sdf`` denotes text file.
+        * ``-s`` Separator character between fields.
+        * ``-q`` Quote character surrounding fields. ``d`` denotes double-quote.
+
+    CSV format expected by importer:
+
+        * Fields separated by comma ``,``
+        * Fields surrounded by double-quotes ``"``. This enables fields (such as
+          Lyrics) to include new-lines and commas. Double-quotes within a field
+          are denoted by two double-quotes ``""``
+        * Note: This is the default format of the Python ``csv`` module.
+
    """

    def doImport(self):
        """
-        Receive ... to import.
+        Receive a CSV file (from a ZionWorx database dump) to import.
        """
-        #open xml file
-        with open(self.importSource, 'rb') as f:
-            songs_xml = unicode(f.read(), u'utf-8')
-            # check single xml file
-            if not re.match(ur' *<\?xml[^<>]*\?>', songs_xml):
-                # Error: invalid file (no XML declaration)
-                print u'Error: invalid file (no XML declaration)'
-            else:
-                # clean invalid XML
-                # remove DefaultStyle attribute if non-empty
-                songs_xml = re.sub(ur'DefaultStyle=".+" />', u'/>', songs_xml)
-                # replace & with &amp; (skip existing entities)
-                songs_xml = re.sub(ur'&(?![a-zA-Z#][a-zA-Z0-9]*;)', u'&amp;',
-                    songs_xml)
-                # replace < with &lt; (skip known <tags>)
-                songs_xml = re.sub(ur'<(?![?DMFR/])', u'&lt;', songs_xml)
-                # replace " within Lyrics attribute with &quot;
-                songs_xml = re.sub(ur'(?<=Lyrics=")([^<]*)(?=" Writer=)',
-                    self._escapeQuotes, songs_xml)
-                print songs_xml
-
-                # parse XML
-                tree = etree.fromstring(songs_xml.encode(u'utf-8'))
-                for song in tree[1].iterchildren():
-                    for attrib, value in song.attrib.items():
-                        print attrib + ':', value
-                    print ''
-
-    def _escapeQuotes(self, m):
-        return m.group(0).replace('"', '&quot;')
+        if not os.path.isfile(self.importSource):
+            self.logError(unicode(translate('SongsPlugin.ZionWorxImport',
+                'No songs to import.')),
+                unicode(translate('SongsPlugin.ZionWorxImport',
+                    'No %s CSV file found.' % WizardStrings.ZW)))
+            return
+        with open(self.importSource, 'rb') as songs_file:
+            songs_reader = csv.reader(songs_file)
+            try:
+                num_records = sum(1 for _ in songs_reader)
+            except csv.Error, e:
+                self.logError(unicode(translate('SongsPlugin.ZionWorxImport',
+                    'Error reading CSV file.')),
+                    unicode(translate('SongsPlugin.ZionWorxImport',
+                    'Line %d: %s' % songs_reader.line_num, e)))
+            log.debug(u'%s records found in CSV file' % num_records)
+            self.importWizard.progressBar.setMaximum(num_records)
+            fieldnames = [u'SongNum', u'Title1', u'Title2', u'Lyrics',
+                u'Writer', u'Copyright', u'Keywords', u'DefaultStyle']
+            songs_reader_dict= csv.DictReader(songs_file, fieldnames)
+            try:
+                for record in songs_reader_dict:
+                    if self.stopImportFlag:
+                        return
+                    self.setDefaults()
+                    self.title = unicode(record[u'Title1'])
+                    if record[u'Title2']:
+                        self.alternateTitle = unicode(record[u'Title2'])
+                    self.parseAuthor(unicode(record[u'Writer']))
+                    self.addCopyright(unicode(record[u'Copyright']))
+                    self.processSongText(unicode(record[u'Lyrics']))
+                    if not self.finish():
+                        self.logError(self.title)
+            except csv.Error, e:
+                self.logError(unicode(translate('SongsPlugin.ZionWorxImport',
+                    'Error reading CSV file.')),
+                    unicode(translate('SongsPlugin.ZionWorxImport',
+                    'Line %d: %s' % songs_reader_dict.line_num, e)))