/beautifulsoup.dart

A dart port of the famous python library beautifulsoup

Primary LanguageDartMIT LicenseMIT

"Provide idiomatic ways of navigating, searching, and modifying the parse tree. It commonly saves programmers hours or days of work"

Usage

A simple usage example:

import 'package:beautifulsoup/beautifulsoup.dart';

main() {
  String document = """
  <html><head><title>The Dormouse's story</title></head>
  <body>
  <p class="title"><b>The Dormouse's story</b></p>

  <p class="story">Once upon a time there were three little sisters; and their names were
  <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
  <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
  <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
  and they lived at the bottom of a well.</p>

  <p class="story">...</p>
  """;
  
  var soup = Beautifulsoup(document);
  
  print(soup("title").outerHtml); //soup.title
  print(soup("title").localName); //soup.title.name
  print(soup("title").text); //soup.title.string
  print(soup("title").parent.localName); //soup.title.parent.name
  print(soup("p").outerHtml); //soup.title.parent.name
  print(soup("p").attributes["class"]); //soup.p['class']
  print(soup("a").outerHtml); //soup.a
  print(soup.get_text()); //soup.get_text()
  print(soup.find_all("a").map((e)=> (e.outerHtml)).toList());
}

Modifying text via class name

  String document = """
   <p id="RipVanWinkle">
    RipVanWinkle paragraph.
  </p>
    """;
    
  var soup = Beautifulsoup(document);
  soup.find(id:"#RipVanWinkle").text = "Wake up, sleepy head!";
  print(soup.print());

Modify text by tag

  String document = """
  <a href="http://example.com/">I linked to <i>example.com</i></a>
  """;
    
  var soup = Beautifulsoup(document);
  soup("a").text ="New link text";
  print(soup.print());

Find all links in a html document

String document = """
    <html>
    <head>
      <title>
      The Dormouse's story
      </title>
    </head>
    <body>
      <p class="title">
      <b>
        The Dormouse's story
      </b>
      </p>
      <p class="story">
      Once upon a time there were three little sisters; and their names were
      <a class="sister" href="http://example.com/elsie" id="link1">
        Elsie
      </a>
      ,
      <a class="sister" href="http://example.com/lacie" id="link2">
        Lacie
      </a>
      and
      <a class="sister" href="http://example.com/tillie" id="link2">
        Tillie
      </a>
      ; and they lived at the bottom of a well.
      </p>
      <p class="story">
      ...
      </p>
    </body>
    </html>
    """;
    
    var soup = Beautifulsoup(document);
    var hyperlinks = soup.find_all("a").map((e)=> print(soup.attr(e,"href"))).toList();
    

Features and bugs

Please file feature requests and bugs at the issue tracker.