#include<htmlcxx/html/ParserDom.h> ... usingnamespacestd; usingnamespace htmlcxx; //Parse some html code string html = "<html><body>hey</body></html>"; HTML::ParserDom parser; tree<HTML::Node> dom = parser.parseTree(html); //Print whole DOM tree cout << dom << endl; //Dump all links in the tree tree<HTML::Node>::iterator it = dom.begin(); tree<HTML::Node>::iterator end = dom.end(); for (; it != end; ++it) { if (strcasecmp(it->tagName().c_str(), "A") == 0) { it->parseAttributes(); cout << it->attribute("href").second << endl; } } //Dump all text of the document it = dom.begin(); end = dom.end(); for (; it != end; ++it) { if ((!it->isTag()) && (!it->isComment())) { cout << it->text(); } } cout << endl;
GumboVector* children = &node->v.element.children;//指向子结点 for (unsignedint i = 0; i < children->length; ++i) { search_for_links(static_cast<GumboNode*>(children->data[i]));//递归查找 } }