diff --git a/openlp/plugins/bibles/lib/csvbible.py b/openlp/plugins/bibles/lib/csvbible.py index fd987dfdf..82872e15b 100644 --- a/openlp/plugins/bibles/lib/csvbible.py +++ b/openlp/plugins/bibles/lib/csvbible.py @@ -50,14 +50,17 @@ The format of the books file is: ... 40,2,Matthew,Matt -The format of the verses file is: +There are two acceptable formats of the verses file. They are: ,,, + or + ,,, For example: 1,1,1,"In the beginning God created the heaven and the earth." - 1,1,2,"And the earth was without form, and void; and darkness...." + or + "Genesis",1,2,"And the earth was without form, and void; and...." All CSV files are expected to use a comma (',') as the delimeter and double quotes ('"') as the quote symbol. @@ -172,15 +175,22 @@ class CSVBible(BibleDB): for line in verse_reader: if self.stop_import_flag: break - if book_ptr != book_list[int(line[0])]: - book = self.get_book(book_list[int(line[0])]) + try: + line_book = book_list[int(line[0])] + except ValueError: + line_book = unicode(line[0], details['encoding']) + if book_ptr != line_book: + book = self.get_book(line_book) book_ptr = book.name self.wizard.incrementProgressBar(unicode(translate( 'BibleDB.Wizard', 'Importing verses from %s...', 'Importing verses from ...')) % book.name) self.session.commit() - self.create_verse(book.id, line[1], line[2], - unicode(line[3], details['encoding'])) + try: + verse_text = unicode(line[3], details['encoding']) + except UnicodeError: + verse_text = unicode(line[3], u'cp1252') + self.create_verse(book.id, line[1], line[2], verse_text) self.wizard.incrementProgressBar(translate('BibleDB.Wizard', 'Importing verses... done.')) Receiver.send_message(u'openlp_process_events') diff --git a/openlp/plugins/bibles/lib/http.py b/openlp/plugins/bibles/lib/http.py index b844bbe61..e004be9df 100644 --- a/openlp/plugins/bibles/lib/http.py +++ b/openlp/plugins/bibles/lib/http.py @@ -210,7 +210,8 @@ class BGExtract(object): cleaner = [(re.compile(' |
|\'\+\''), lambda match: '')] soup = get_soup_for_bible_ref( u'http://www.biblegateway.com/passage/?%s' % url_params, - cleaner=cleaner) + pre_parse_regex=r'', pre_parse_substitute='', + cleaner=cleaner) if not soup: return None Receiver.send_message(u'openlp_process_events') @@ -499,7 +500,8 @@ class HTTPBible(BibleDB): """ return HTTPBooks.get_verse_count(book, chapter) -def get_soup_for_bible_ref(reference_url, header=None, cleaner=None): +def get_soup_for_bible_ref(reference_url, header=None, pre_parse_regex=None, + pre_parse_substitute=None, cleaner=None): """ Gets a webpage and returns a parsed and optionally cleaned soup or None. @@ -509,6 +511,13 @@ def get_soup_for_bible_ref(reference_url, header=None, cleaner=None): ``header`` An optional HTTP header to pass to the bible web server. + ``pre_parse_regex`` + A regular expression to run on the webpage. Allows manipulation of the + webpage before passing to BeautifulSoup for parsing. + + ``pre_parse_substitute`` + The text to replace any matches to the regular expression with. + ``cleaner`` An optional regex to use during webpage parsing. """ @@ -518,12 +527,15 @@ def get_soup_for_bible_ref(reference_url, header=None, cleaner=None): if not page: send_error_message(u'download') return None + page_source = page.read() + if pre_parse_regex and pre_parse_substitute is not None: + page_source = re.sub(pre_parse_regex, pre_parse_substitute, page_source) soup = None try: if cleaner: - soup = BeautifulSoup(page, markupMassage=cleaner) + soup = BeautifulSoup(page_source, markupMassage=cleaner) else: - soup = BeautifulSoup(page) + soup = BeautifulSoup(page_source) except HTMLParseError: log.exception(u'BeautifulSoup could not parse the bible page.') if not soup: