| 
#! /usr/bin/env python
 | 
 | 
 | 
 | 
"Convert a RadiantCMS SQLite3 db file into separate page and header text files"
 | 
 | 
 | 
 | 
import optparse, os
 | 
 | 
op = optparse.OptionParser()
 | 
 | 
op.add_option("-o", "--out", dest="OUTDIR", default="out")
 | 
 | 
opts, args = op.parse_args()
 | 
 | 
 | 
 | 
import sqlite3
 | 
 | 
conn = sqlite3.connect(args[0])
 | 
 | 
conn.row_factory = sqlite3.Row
 | 
 | 
c = conn.cursor()
 | 
 | 
 | 
 | 
import unicodedata
 | 
 | 
def norm(s):
 | 
 | 
    return unicodedata.normalize("NFD", s).encode("ascii", "ignore")
 | 
 | 
 | 
 | 
import datetime
 | 
 | 
def date(s):
 | 
 | 
    return datetime.datetime.strptime(s, "%Y-%m-%d %H:%M:%S").date().isoformat() if s else ""
 | 
 | 
 | 
 | 
import textwrap, re
 | 
 | 
class DocWrapper(textwrap.TextWrapper):
 | 
 | 
    """Wrap text in a document, processing each paragraph individually"""
 | 
 | 
 | 
 | 
    def __init__(self):
 | 
 | 
        self.tw = textwrap.TextWrapper(width=120, break_long_words=False)
 | 
 | 
 | 
 | 
    def wrap(self, text):
 | 
 | 
        """Override textwrap.TextWrapper to process 'text' properly when
 | 
 | 
        multiple paragraphs present"""
 | 
 | 
        para_edge = re.compile(r"(\n\s*\n)", re.MULTILINE)
 | 
 | 
        paragraphs = para_edge.split(text)
 | 
 | 
        wrapped_lines = []
 | 
 | 
        for para in paragraphs:
 | 
 | 
            if para.isspace():
 | 
 | 
                wrapped_lines.append('')
 | 
 | 
            else:
 | 
 | 
                wrapped_lines.extend(self.tw.wrap(para))
 | 
 | 
        return wrapped_lines
 | 
 | 
 | 
 | 
dw = DocWrapper()
 | 
 | 
 | 
 | 
for page in conn.execute("SELECT * FROM pages"):
 | 
 | 
    pagename = page["slug"] if page["slug"] != "/" else "index"
 | 
 | 
    outfile = os.path.join(opts.OUTDIR, "%s.md" % pagename)
 | 
 | 
    with open(outfile, "w") as f:
 | 
 | 
        f.write("<!-- \n")
 | 
 | 
        f.write(".. title: " + norm(page["title"]) + "\n")
 | 
 | 
        f.write(".. slug: " + pagename + "\n")
 | 
 | 
        if page["published_at"]:
 | 
 | 
            f.write(".. date: " + page["published_at"] + "\n")
 | 
 | 
        else:
 | 
 | 
            f.write(".. date: 2008-06-01 12:00:00\n")
 | 
 | 
        f.write(".. type: text\n")
 | 
 | 
        f.write(".. category: blog\n")
 | 
 | 
        f.write("-->")
 | 
 | 
        f.write("\n\n")
 | 
 | 
        for part in conn.execute("SELECT * FROM page_parts WHERE page_id = ? ORDER BY page_parts.name", (page["id"],)):
 | 
 | 
            text = dw.fill(norm(part["content"]))
 | 
 | 
            if text:
 | 
 | 
                f.write(text + "\n")
 |