/CppHtml

A modern C++ HTML parser

Primary LanguageC++MIT LicenseMIT

CppHtml

A modern C++ HTML parser.

Define

using namespace std;
using namespace html;
html_doc doc{
    { "html" },
    { { "html" },
      { { { "head" },
          { { { "title" },
              { { "Title" } } } } },
        { { "body" },
          { { { "h1", { { "align", "center" } } },
              { { "Hello" } } },
            { { "p" },
              { { "Hello world!" } } } } } } }
};
cout << doc << endl;

Output:

<!doctype html>
<html>
  <head>
    <title>Title</title>
  </head>
  <body>
    <h1 align="center">Hello</h1>
    <p>Hello world!</p>
  </body>
</html>

Parse

using namespace std;
using namespace html;
html_doc doc = html_doc::parse("<!doctype html><html><head><title>Title</title></head><body><h1 align=\"center\">Hello</h1><p>Hello world!</p></body></html>");
cout << doc << endl;

The output is the same as above.

Query

using namespace std;
using namespace html;
html_doc doc = html_doc::parse("<!doctype html><html><head><title>Title<!--</title>--></head></title><body><h1 align=\"center\">Hello</h1><p>Hello <p>world!<br/></body></html>");
for (auto& p : doc.node()["body"].back()["p"])
{
    cout << p.front().text();
}

Output:

Hello world!

Input

You can input a html_doc from a std::istream, but NOT from cin.

Tidy

The output html is always tidy.