Fix BG Chinese bible (Bug #706211)

Fix CSV import to accept the same files V1 did and cater to "not quite" ASCII encoding generated by cp1252 files. bzr-revno: 1255 Fixes: https://launchpad.net/bugs/706211
2011-01-31 18:17:10 +00:00 · 2011-01-31 18:17:10 +00:00 · 90b1205e5b
commit 90b1205e5b
parent 5cb6fd12a6 250107073e
2 changed files with 32 additions and 10 deletions
--- a/openlp/plugins/bibles/lib/csvbible.py
+++ b/openlp/plugins/bibles/lib/csvbible.py
@ -50,14 +50,17 @@ The format of the books file is:
        ...
        40,2,Matthew,Matt

-The format of the verses file is:
+There are two acceptable formats of the verses file.  They are:

    <book_id>,<chapter_number>,<verse_number>,<verse_text>
+    or
+    <book_name>,<chapter_number>,<verse_number>,<verse_text>

    For example:

        1,1,1,"In the beginning God created the heaven and the earth."
-        1,1,2,"And the earth was without form, and void; and darkness...."
+        or
+        "Genesis",1,2,"And the earth was without form, and void; and...."

 All CSV files are expected to use a comma (',') as the delimeter and double
 quotes ('"') as the quote symbol.
@ -172,15 +175,22 @@ class CSVBible(BibleDB):
            for line in verse_reader:
                if self.stop_import_flag:
                    break
-                if book_ptr != book_list[int(line[0])]:
-                    book = self.get_book(book_list[int(line[0])])
+                try:
+                    line_book = book_list[int(line[0])]
+                except ValueError:
+                    line_book = unicode(line[0], details['encoding'])
+                if book_ptr != line_book:
+                    book = self.get_book(line_book)
                    book_ptr = book.name
                    self.wizard.incrementProgressBar(unicode(translate(
                        'BibleDB.Wizard', 'Importing verses from %s...',
                        'Importing verses from <book name>...')) % book.name)
                    self.session.commit()
-                self.create_verse(book.id, line[1], line[2],
-                    unicode(line[3], details['encoding']))
+                try:
+                    verse_text = unicode(line[3], details['encoding'])
+                except UnicodeError:
+                    verse_text = unicode(line[3], u'cp1252')
+                self.create_verse(book.id, line[1], line[2], verse_text)
            self.wizard.incrementProgressBar(translate('BibleDB.Wizard',
                'Importing verses... done.'))
            Receiver.send_message(u'openlp_process_events')
--- a/openlp/plugins/bibles/lib/http.py
+++ b/openlp/plugins/bibles/lib/http.py
@ -210,7 +210,8 @@ class BGExtract(object):
        cleaner = [(re.compile('&nbsp;|<br />|\'\+\''), lambda match: '')]
        soup = get_soup_for_bible_ref(
            u'http://www.biblegateway.com/passage/?%s' % url_params,
-                cleaner=cleaner)
+            pre_parse_regex=r'<meta name.*?/>', pre_parse_substitute='',
+            cleaner=cleaner)
        if not soup:
            return None
        Receiver.send_message(u'openlp_process_events')
@ -499,7 +500,8 @@ class HTTPBible(BibleDB):
        """
        return HTTPBooks.get_verse_count(book, chapter)

-def get_soup_for_bible_ref(reference_url, header=None, cleaner=None):
+def get_soup_for_bible_ref(reference_url, header=None, pre_parse_regex=None,
+    pre_parse_substitute=None, cleaner=None):
    """
    Gets a webpage and returns a parsed and optionally cleaned soup or None.

@ -509,6 +511,13 @@ def get_soup_for_bible_ref(reference_url, header=None, cleaner=None):
    ``header``
        An optional HTTP header to pass to the bible web server.

+    ``pre_parse_regex``
+        A regular expression to run on the webpage. Allows manipulation of the
+        webpage before passing to BeautifulSoup for parsing.
+
+    ``pre_parse_substitute``
+        The text to replace any matches to the regular expression with.
+
    ``cleaner``
        An optional regex to use during webpage parsing.
    """
@ -518,12 +527,15 @@ def get_soup_for_bible_ref(reference_url, header=None, cleaner=None):
    if not page:
        send_error_message(u'download')
        return None
+    page_source = page.read()
+    if pre_parse_regex and pre_parse_substitute is not None:
+        page_source = re.sub(pre_parse_regex, pre_parse_substitute, page_source)
    soup = None
    try:
        if cleaner:
-            soup = BeautifulSoup(page, markupMassage=cleaner)
+            soup = BeautifulSoup(page_source, markupMassage=cleaner)
        else:
-            soup = BeautifulSoup(page)
+            soup = BeautifulSoup(page_source)
    except HTMLParseError:
        log.exception(u'BeautifulSoup could not parse the bible page.')
    if not soup: