From 1725768e2eaf68a1445f1aef9d95da0bf461e88b Mon Sep 17 00:00:00 2001 From: "sgunderson@bigfoot.com" <> Date: Thu, 3 Nov 2011 22:11:14 +0100 Subject: [PATCH] Implement prettyprinting in the PHP5 SWIG version. --- php5-swig/include.php | 2 +- php5-swig/simple.php | 3 +- php5-swig/xml-template.swig | 114 ++++++++++++++++++++++++++++++++++-- 3 files changed, 113 insertions(+), 6 deletions(-) diff --git a/php5-swig/include.php b/php5-swig/include.php index 69ee513..6845d50 100644 --- a/php5-swig/include.php +++ b/php5-swig/include.php @@ -9,5 +9,5 @@ $master = XML_Template_process_file('../xml/master.xml', array( 'h1' => 'Nice heading here', 'contents' => $doc ), true); -print XML_Template_convert_doc_to_string($master); +print XML_Template_convert_doc_to_string($master, true); ?> diff --git a/php5-swig/simple.php b/php5-swig/simple.php index 7e6ce84..919d9ee 100644 --- a/php5-swig/simple.php +++ b/php5-swig/simple.php @@ -5,7 +5,8 @@ $doc = XML_Template_process_file('../xml/simple.xml', array( 'title' => 'A very basic example', '#hello' => 'Hello world!' ), true); -print XML_Template_convert_doc_to_string($doc); +XML_Template_clean_whitespace($doc, true); +print XML_Template_convert_doc_to_string($doc, true); ?> diff --git a/php5-swig/xml-template.swig b/php5-swig/xml-template.swig index 9cd270e..5e4e133 100644 --- a/php5-swig/xml-template.swig +++ b/php5-swig/xml-template.swig @@ -8,6 +8,7 @@ struct XmlDocPtrWrapper { %{ #include +#include #include "../c++11/xml-template.h" @@ -117,14 +118,118 @@ int close_string(void *context) } // namespace -std::string XML_Template_convert_doc_to_string(XmlDocPtrWrapper doc) +std::string XML_Template_convert_doc_to_string(XmlDocPtrWrapper doc, bool prettyprint) { + xmlIndentTreeOutput = prettyprint; std::string ret; xmlOutputBufferPtr buf = xmlOutputBufferCreateIO(write_to_string, close_string, &ret, NULL); - xmlSaveFileTo(buf, doc->ptr, NULL); + xmlSaveFormatFileTo(buf, doc->ptr, "UTF-8", prettyprint); return ret; } - + +namespace { + +// Remove document fragments (ie. move their content up in the parent node) +// and combine neighboring text nodes into one. +void normalize_node(xmlNodePtr node) +{ + xmlNode *next_child; + for (xmlNode *child = node->children; child != NULL; child = next_child) { + next_child = child->next; + if (child->type == XML_DOCUMENT_FRAG_NODE) { + while (child->children != NULL) { + xmlAddPrevSibling(child, child->children); + } + + xmlUnlinkNode(child); + xmlFreeNode(child); + } + } + + // xmlAddPrevSibling merges adjacent text nodes, but many other things + // (including xmlUnlinkNode) do not, so make an extra pass. + for (xmlNode *child = node->children; child != NULL; child = child->next) { + while (child->type == XML_TEXT_NODE && (child->next != NULL && child->next->type == XML_TEXT_NODE)) { + xmlNode *next_child = child->next; + + xmlChar *content = xmlNodeGetContent(next_child); + xmlNodeAddContent(child, content); + xmlFree(content); + + xmlUnlinkNode(next_child); + xmlFreeNode(next_child); + } + normalize_node(child); + } +} + +// Clean the page of non-necessary whitespace. Leaves whitespace alone if and +// only if xml:space="preserve" on the element. (IOW, it doesn't parse the DTDs, +// nor the CSS.) +void clean_node(xmlNodePtr node, bool preserve_whitespace, bool aggressive) +{ + if (node->type == XML_TEXT_NODE) { + std::string content = reinterpret_cast(xmlNodeGetContent(node)); + if (!preserve_whitespace) { + unsigned dstpos = 0; + for (unsigned srcpos = 0; srcpos < content.size(); ++srcpos, ++dstpos) { + if (content[srcpos] == '\n' || + content[srcpos] == '\t' || + content[srcpos] == ' ') { + content[dstpos] = ' '; + + // compress double spaces + if (dstpos > 0 && content[dstpos - 1] == ' ') { + --dstpos; + } + } else { + content[dstpos] = content[srcpos]; + } + } + content.resize(dstpos); + } + if (content.empty() || (aggressive && content == " ")) { + xmlUnlinkNode(node); + xmlFreeNode(node); + } else { + xmlNodeSetContentLen(node, reinterpret_cast(content.data()), content.size()); + } + } else { + if (node->type == XML_ELEMENT_NODE) { + xmlChar *space = xmlGetProp(node, reinterpret_cast("xml:space")); + preserve_whitespace = (space != NULL && strcmp(reinterpret_cast(space), "preserve") == 0); + } + + xmlNode *next_child; + for (xmlNode *child = node->children; child != NULL; child = next_child) { + next_child = child->next; + clean_node(child, preserve_whitespace, aggressive); + } + + if (node->type == XML_ELEMENT_NODE && node->children == NULL) { + std::string tag = reinterpret_cast(node->name); + + // These are the only elements allowed in XHTML to be EMPTY, + // so insert dummy nodes to prevent the output from using + // the syntax where not appropriate. + if (tag != "base" && tag != "meta" && tag != "link" && tag != "hr" && + tag != "br" && tag != "param" && tag != "img" && tag != "area" && + tag != "input" && tag != "col") { + xmlNode *text = xmlNewText(reinterpret_cast("")); + xmlAddChild(node, text); + } + } + } +} + +} // namespace + +void XML_Template_clean_whitespace(XmlDocPtrWrapper doc, bool aggressive) +{ + normalize_node(xmlDocGetRootElement(doc->ptr)); + clean_node(xmlDocGetRootElement(doc->ptr), false, aggressive); +} + %} %typemap(in) Directive* { @@ -133,5 +238,6 @@ std::string XML_Template_convert_doc_to_string(XmlDocPtrWrapper doc) XmlDocPtrWrapper XML_Template_process_file(const std::string &input_filename, Directive *root_directive, bool clean); void XML_Template_process(XmlDocPtrWrapper doc, Directive *root_directive, bool clean); -std::string XML_Template_convert_doc_to_string(XmlDocPtrWrapper doc); +void XML_Template_clean_whitespace(XmlDocPtrWrapper doc, bool aggressive); +std::string XML_Template_convert_doc_to_string(XmlDocPtrWrapper doc, bool prettyprint); -- 2.39.2