As per request of Vinko Vrsalovic, here is the code that get that result:
void dumpNode( TidyNode tnod, int indent )
{
TidyNode child;
for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) )
{
ctmbstr name;
switch ( tidyNodeGetType(child) )
{
case TidyNode_Root: name = "Root"; break;
case TidyNode_DocType: name = "DOCTYPE"; break;
case TidyNode_Comment: name = "Comment"; break;
case TidyNode_ProcIns: name = "Processing Instruction"; break;
case TidyNode_Text: name = "Text"; break;
case TidyNode_CDATA: name = "CDATA"; break;
case TidyNode_Section: name = "XML Section"; break;
case TidyNode_Asp: name = "ASP"; break;
case TidyNode_Jste: name = "JSTE"; break;
case TidyNode_Php: name = "PHP"; break;
case TidyNode_XmlDecl: name = "XML Declaration"; break;
case TidyNode_Start:
case TidyNode_End:
case TidyNode_StartEnd:
default:
name = tidyNodeGetName( child );
TidyAttr att = tidyAttrFirst(child);
while (att)
{
std::cout < <"Name attr: " << tidyAttrName(att) << std::endl;
std::cout< <"Value attr:"<< tidyAttrValue(att) << std::endl;
att = tidyAttrNext(att);
}
break;
}
assert( name != NULL );
printf( "%d*.*%d%sNode: %s\n", indent, indent, " ", name );
dumpNode( child, indent + 4 );
}
}
void dumpHtml( TidyDoc tdoc)
{
dumpNode( tidyGetHtml(tdoc),0 );
}
int main(int argc, char **argv) {
std::string toReturn("");
TidyBuffer output;
TidyBuffer errbuf;
int rc = -1;
Bool ok;
tidyBufInit(&output);
tidyBufInit(&errbuf);
TidyDoc tdoc = tidyCreate();
ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML
if ( ok )
rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics
if ( rc >= 0 )
rc = tidyParseFile(tdoc, "fuebuena.html"); // Parse the input
if ( rc >= 0 )
rc = tidyCleanAndRepair( tdoc ); // Tidy it up!
if (rc >= 0)
dumpHtml(tdoc);
return 0;
}