A modern C++ HTML parser.
using namespace std;
using namespace html;
html_doc doc{
{ "html" },
{ { "html" },
{ { { "head" },
{ { { "title" },
{ { "Title" } } } } },
{ { "body" },
{ { { "h1", { { "align", "center" } } },
{ { "Hello" } } },
{ { "p" },
{ { "Hello world!" } } } } } } }
};
cout << doc << endl;
Output:
<!doctype html>
<html>
<head>
<title>Title</title>
</head>
<body>
<h1 align="center">Hello</h1>
<p>Hello world!</p>
</body>
</html>
using namespace std;
using namespace html;
html_doc doc = html_doc::parse("<!doctype html><html><head><title>Title</title></head><body><h1 align=\"center\">Hello</h1><p>Hello world!</p></body></html>");
cout << doc << endl;
The output is the same as above.
using namespace std;
using namespace html;
html_doc doc = html_doc::parse("<!doctype html><html><head><title>Title<!--</title>--></head></title><body><h1 align=\"center\">Hello</h1><p>Hello <p>world!<br/></body></html>");
for (auto& p : doc.node()["body"].back()["p"])
{
cout << p.front().text();
}
Output:
Hello world!
You can input a html_doc
from a std::istream
, but NOT from cin
.
The output html is always tidy.