#!/usr/bin/env python # -*- coding: utf-8 -*- """ org.py convert org source file into html file """ __author__ = "Jaemok Jeong(jmjeong@gmail.com)" __date__ = "Tue Aug 11 12:50:17 2009" import os import tempfile import logging import re import sys import commands import codecs import datetime import pytz from BeautifulSoup import BeautifulSoup import blogofile_bf as bf logger = logging.getLogger("blogofile.org") class EmacsNotFoundException(Exception): pass post = bf.config.controllers.blog.post.mod class org(object): """ Class to Convert org file into html file It composes org-content with source, preamble, and postample. Launches emacs and convert the org-content into html file. Generated html file is processed with BeautifulSoup module to extract body section and title and categories. self.content = body self.title = title (which is first '*' in org-file) self.category = categories (which is tags in first '*' in org-file) self.date = date (which is scheduled file?) """ def __init__(self, source): self.source = source return self.__convert() def __convert(self): temp_file = tempfile.NamedTemporaryFile(suffix='.org') try: temp_file.write(bf.config.blog.emacs_orgmode_preamble) temp_file.write("\n") except AttributeError: pass temp_file.write(self.source.encode(bf.config.blog_post_encoding)) temp_file.flush() pname = "" try: pname = bf.config.blog.emacs_binary except AttributeError: raise EmacsNotFoundException("Emacs binary is not defined") pname += " --batch" try: if bf.config.blog.emacs_preload_elisp: pname += " --load={0}".format( bf.config.blog.emacs_preload_elisp) except AttributeError: pass pname += " --visit={0} --funcall org-export-as-html-batch".format( temp_file.name) logger.debug("Exec name::: %s" % pname) status, output = commands.getstatusoutput(pname) logger.debug("Convert output:::\n\t%s"%output) if status: raise EmacsNotFoundException("orgfile filter failed") html = temp_file.name[:-4] + '.html' temp_file.close() #IMO codecs.open is broken on Win32. #It refuses to open files without replacing newlines with CR+LF #reverting to regular open and decode: content = open(html, "rb").read().decode(bf.config.blog_post_encoding) # remote the temporary file os.remove(html) soup = BeautifulSoup(content) # the first h2 section will be used for title, category, and date metaline = soup.find('div', {'id': 'outline-container-1'}).h2 # extract title try: self.title = re.sub(' ', '', metaline.contents[0]).strip() except AttributeError: self.title = None # extract category try: categories = metaline('span', {'class':'tag'})[0].string self.categories = set([post.Category(x) for x in categories.split(' ')]) except: self.categories = None # extract date try: date = metaline('span', {'class':'timestamp'})[0].string # 2009-08-22 Sat 15:22 # date_format = "%Y/%m/%d %H:%M:%S" self.date = datetime.datetime.strptime(date, "%Y-%m-%d %a %H:%M") self.date = self.date.replace( tzinfo=pytz.timezone(bf.config.blog_timezone)) except: self.date = None # delete first h2 section (which is title and category) try: metaline.extract() except AttributeError: pass # print soup.body try: toc = soup.find('div',{'id': 'table-of-contents'}) content = soup.find('div', {'id': 'outline-container-1'}) if toc != None: content = str(toc) + str(content) self.content = str(content).decode(bf.config.blog_post_encoding) except: pass if __name__ == '__main__': import doctest doctest.testmod(verbose=True)