import py import docutils.core from docutils.writers import html4css1 from lxml import etree # override some stuff in docutils to make it easier to use the results with # templess class HTMLTranslatorNoNS(html4css1.HTMLTranslator): doctype = '' def astext(self): ret = ''.join(['\n\n'] + self.body_prefix + self.body_pre_docinfo + self.docinfo + self.body + self.body_suffix) return ret class HTMLWriterNoNS(html4css1.Writer): def __init__(self, *args, **kwargs): html4css1.Writer.__init__(self, *args, **kwargs) self.translator_class = HTMLTranslatorNoNS def rest2etree(fpath): rst = fpath.read() w = HTMLWriterNoNS() html = docutils.core.publish_string(rst, writer=w) # templess doesn't know about nbsps html = html.replace(' ', ' ') tree = etree.fromstring(html) return tree