1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
org.py convert org source file into html file
"""
__author__ = "Jaemok Jeong(jmjeong@gmail.com)"
__date__ = "Tue Aug 11 12:50:17 2009"
import os
import tempfile
import logging
import re
import sys
import commands
import codecs
import datetime
import pytz
from BeautifulSoup import BeautifulSoup
import blogofile_bf as bf
logger = logging.getLogger("blogofile.org")
class EmacsNotFoundException(Exception):
pass
post = bf.config.controllers.blog.post.mod
class org(object):
"""
Class to Convert org file into html file
It composes org-content with source, preamble, and postample.
Launches emacs and convert the org-content into html file.
Generated html file is processed with BeautifulSoup module to
extract body section and title and categories.
self.content = body
self.title = title (which is first '*' in org-file)
self.category = categories (which is tags in first '*' in org-file)
self.date = date (which is scheduled file?)
"""
def __init__(self, source):
self.source = source
return self.__convert()
def __convert(self):
temp_file = tempfile.NamedTemporaryFile(suffix='.org')
try:
temp_file.write(bf.config.blog.emacs_orgmode_preamble)
temp_file.write("\n")
except AttributeError:
pass
temp_file.write(self.source.encode(bf.config.blog_post_encoding))
temp_file.flush()
pname = ""
try:
pname = bf.config.blog.emacs_binary
except AttributeError:
raise EmacsNotFoundException("Emacs binary is not defined")
pname += " --batch"
try:
if bf.config.blog.emacs_preload_elisp:
pname += " --load={0}".format(
bf.config.blog.emacs_preload_elisp)
except AttributeError:
pass
pname += " --visit={0} --funcall org-export-as-html-batch".format(
temp_file.name)
logger.debug("Exec name::: %s" % pname)
status, output = commands.getstatusoutput(pname)
logger.debug("Convert output:::\n\t%s"%output)
if status:
raise EmacsNotFoundException("orgfile filter failed")
html = temp_file.name[:-4] + '.html'
temp_file.close()
#IMO codecs.open is broken on Win32.
#It refuses to open files without replacing newlines with CR+LF
#reverting to regular open and decode:
content = open(html, "rb").read().decode(bf.config.blog_post_encoding)
# remote the temporary file
os.remove(html)
soup = BeautifulSoup(content)
# the first h2 section will be used for title, category, and date
metaline = soup.find('div', {'id': 'outline-container-1'}).h2
# extract title
try:
self.title = re.sub(' ', '', metaline.contents[0]).strip()
except AttributeError:
self.title = None
# extract category
try:
categories = metaline('span', {'class':'tag'})[0].string
self.categories = set([post.Category(x)
for x in categories.split(' ')])
except:
self.categories = None
# extract date
try:
date = metaline('span', {'class':'timestamp'})[0].string # 2009-08-22 Sat 15:22
# date_format = "%Y/%m/%d %H:%M:%S"
self.date = datetime.datetime.strptime(date, "%Y-%m-%d %a %H:%M")
self.date = self.date.replace(
tzinfo=pytz.timezone(bf.config.blog_timezone))
except:
self.date = None
# delete first h2 section (which is title and category)
try:
metaline.extract()
except AttributeError:
pass
# print soup.body
try:
toc = soup.find('div',{'id': 'table-of-contents'})
content = soup.find('div', {'id': 'outline-container-1'})
if toc != None:
content = str(toc) + str(content)
self.content = str(content).decode(bf.config.blog_post_encoding)
except:
pass
if __name__ == '__main__':
import doctest
doctest.testmod(verbose=True)
|