myhtmlpp is a modern C++17 HTML parser with CSS selectors based on MyHTML/Modest.
#include <algorithm>
#include <cassert>
#include <iostream>
#include <myhtmlpp/parser.hpp>
int main() {
std::string html(
R"(<!DOCTYPE html>
<p class="hello">Hello World</p>
<div id="bla" class="test"><b>bold</b> div</div>
<div>normal div</div>
<p class="hello"></p>
<img src="image.jpg" hidden>
auto tree = myhtmlpp::parse(html);
// print the serialized tree
std::cout << tree << "\n";
// iterate over all nodes in the tree
for (const auto& node : tree) {
// ...
// filter out nodes
// theses methods return a std::vector<myhtmlpp::Node>
auto by_css ="p.hello");
auto by_tag = tree.find_by_tag("div"); // same as find_by_tag(myhtmlpp::TAG::DIV)
auto by_class = tree.find_by_class("test");
auto by_id = tree.find_by_id("bla");
auto by_attr = tree.find_by_attr("src", "image.jpg");
// get the inner text of a node
for (const auto& node : by_tag) {
std::cout << node.inner_text() << "\n";
// get special nodes from the tree
auto doc = tree.document_node();
auto root = tree.html_node();
auto head = tree.head_node();
auto body = tree.body_node();
// tree traversal with previous(), next(), parent(), first_child(), last_child()
// all these methods return std::optional<Node>
assert(root.parent().value() == doc);
assert(root.first_child().value() == head);
assert(root.last_child().value() == body);
assert(root.children().size() == 3);
// use stl algorithms on the tree
auto div_node_it =
std::find_if(tree.begin(), tree.end(), [](const auto& node) {
return node.tag_id() == myhtmlpp::TAG::DIV &&
assert(div_node_it != tree.end());
auto div_node = *div_node_it;
// access attribute values with operator[]
auto class_attr = div_node["class"];
assert(class_attr == "test");
// a safer alternative is at(), which checks if the attribute exists
// and returns an std::optional<std::string>
auto class_attr_opt ="class");
// iterate over all attributes of a node
// attributes support structured bindings
for (const auto& [key, value] : div_node) {
std::cout << key << "=\"" << value << "\"\n";
The Modest library is included as a submodule.
git clone --recurse-submodules
cd myhtmlpp
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make test
sudo make install
- use
to use the system installation of modest - use
to disable tests - use
to disable doxygen documentation
Instead of installing the library systemwide you can also copy the entire project into your project (or use it as a submodule) and call add_subdirectory()
from CMake.
# you probably want to disable tests and docs
add_library(your_project ...)
target_include_directories(your_project PRIVATE ${MYHTMLPP_INCLUDE_DIR})
target_link_libraries(your_project PRIVATE ${MYHTMLPP_LIBRARIES})
You can build docs with make doc