|
#! /usr/bin/env python
"Convert a RadiantCMS SQLite3 db file into separate page and header text files"
import optparse, os
op = optparse.OptionParser()
op.add_option("-o", "--out", dest="OUTDIR", default="out")
opts, args = op.parse_args()
import sqlite3
conn = sqlite3.connect(args[0])
conn.row_factory = sqlite3.Row
c = conn.cursor()
import unicodedata
def norm(s):
return unicodedata.normalize("NFD", s).encode("ascii", "ignore")
import datetime
def date(s):
return datetime.datetime.strptime(s, "%Y-%m-%d %H:%M:%S").date().isoformat() if s else ""
import textwrap, re
class DocWrapper(textwrap.TextWrapper):
"""Wrap text in a document, processing each paragraph individually"""
def __init__(self):
self.tw = textwrap.TextWrapper(width=120, break_long_words=False)
def wrap(self, text):
"""Override textwrap.TextWrapper to process 'text' properly when
multiple paragraphs present"""
para_edge = re.compile(r"(\n\s*\n)", re.MULTILINE)
paragraphs = para_edge.split(text)
wrapped_lines = []
for para in paragraphs:
if para.isspace():
wrapped_lines.append('')
else:
wrapped_lines.extend(self.tw.wrap(para))
return wrapped_lines
dw = DocWrapper()
for page in conn.execute("SELECT * FROM pages"):
pagename = page["slug"] if page["slug"] != "/" else "index"
outfile = os.path.join(opts.OUTDIR, "%s.md" % pagename)
with open(outfile, "w") as f:
f.write("<!-- \n")
f.write(".. title: " + norm(page["title"]) + "\n")
f.write(".. slug: " + pagename + "\n")
if page["published_at"]:
f.write(".. date: " + page["published_at"] + "\n")
else:
f.write(".. date: 2008-06-01 12:00:00\n")
f.write(".. type: text\n")
f.write(".. category: blog\n")
f.write("-->")
f.write("\n\n")
for part in conn.execute("SELECT * FROM page_parts WHERE page_id = ? ORDER BY page_parts.name", (page["id"],)):
text = dw.fill(norm(part["content"]))
if text:
f.write(text + "\n")
|