OCGumbo is an Mac Objective-C wrapper of the Google Gumbo HTML5 parser.
iOSGumbo is an iOS port of OCGumbo..
To use Google Gumbo in your own project
-
Copy the /OCGumbo folder from this project to your own project directory. I placed it parallel to the xcode project.
-
Drag the /OCGumbo folder into your XCode project.
-
You only need certain files to use gumbo so DELETE > REMOVE REFERENCES for all file under OCGumbo except the following
/OCGumbo/OCGumbo.h /OCGumbo/OCGumbo.m /OCGumbo/OCGumbo+Query.h /OCGumbo/OCGumbo+Query.m /OCGumbo/gumbo/src < and all its contents
delete >> remove references all other files under /OCGumbo/gumbo/
In an objective-C class
#import "OCGumbo+Query.h"- (void)viewDidLoad
{
[super viewDidLoad];
// Do any additional setup after loading the view, typically from a nib.
BOOL _debugOn = TRUE;
NSLog(@"\n\n===============London Bus info ==================");
NSString *busHTMLString_ = [NSString stringWithContentsOfURL:[NSURL URLWithString:@"http://www.tfl.gov.uk/maps_/bus-route-maps?Query=Barking%20and%20Dagenham"]
encoding:NSUTF8StringEncoding
error:nil];
if (busHTMLString_) {
OCGumboDocument *busDoc_ = [[OCGumboDocument alloc] initWithHTMLString:busHTMLString_];
//Dont panic if these dont have data - have noticed them blank sometimes
NSLog(@"document:%@", busDoc_);
NSLog(@"has doctype: %d", busDoc_.hasDoctype);
NSLog(@"publicID: %@", busDoc_.publicID);
NSLog(@"systemID:%@", busDoc_.systemID);
NSLog(@"title:%@", busDoc_.title);
//-----------------------------------------------------------------------------------
//Find all links in this div with
/*
<div class="vertical-button-container">
<a href="?Query=Barking and Dagenham" class="plain-button external-link" target="_parent">Barking and Dagenham</a>
<a href="?Query=Barnet" class="plain-button external-link" target="_parent">Barnet</a>
<a href="?Query=Bexley" class="plain-button external-link" target="_parent">Bexley</a>
*/
//-----------------------------------------------------------------------------------
//Find all links in this div with
NSArray *hrefArray_ = busDoc_.Query(@"div.vertical-button-container").find(@"a");
for (OCGumboNode *hrefNode_ in hrefArray_) {
NSLog(@"TEXT:'%@'\n", hrefNode_.text());
NSLog(@"href:'%@'\n", hrefNode_.attr(@"href"));
}
//-----------------------------------------------------------------------------------
/*
<div class="multi-document-download-container">
<a class="document-download-wrap pdf pdf" href="/cdn/static/cms/documents/bus-route-maps/barking-longbridge-road-261013.pdf" target="_parent">
<div class="document-download-text">
<p>Barking (Longbridge Road)</p>
</div>
<div class="document-download-icon download-doc" />
</a>
<a class="document-download-wrap pdf pdf" href="/cdn/static/cms/documents/bus-route-maps/barking-310813.pdf" target="_parent">
<div class="document-download-text">
<p>Barking</p>
</div>
<div class="document-download-icon download-doc" />
</a>
*/
//PDFs - <div class="multi-document-download-container">
//OCQueryObject *divPDFs_ = busDoc_.Query(@"div.multi-document-download-container");
NSArray *pdfsHrefArray_ = busDoc_.Query(@"div.multi-document-download-container").find(@"a");
for (OCGumboNode *hrefNode_ in pdfsHrefArray_) {
NSLog(@"TEXT:'%@'\n", hrefNode_.text());
NSLog(@"href:'%@'\n", hrefNode_.attr(@"href"));
}
if(_debugOn)NSLog(@"");
}else{
NSLog(@"ERROR: [%s] busHTMLString_ is nil", __PRETTY_FUNCTION__);
}
}
Email: support@cityoflondonconsulting.com
Doc below are from OCGumbo copied for ease
- Add Gumbo sources or lib to your project.
- Add OCGumbo file and import "OCGumbo.h", then use OCGumboDocument to parse an html string.
####Objects####
| Class | Description |
|---|---|
| OCGumboDocument | the root of a document tree |
| OCGumboElement | an element in an HTML document |
| OCGumboText | the textual content of an element |
| OCGumboNode | a single node in the document tree |
| OCGumboAttribute | an attribute of an Element object |
####Examples####
OCGumboDocument *document = [[OCGumboDocument alloc] initWithHTMLString:htmlString];
OCGumboElement *root = document.rootElement;
//document: do something with the document.
//rootElement: do something with the html tree.Now, OCGumbo add more Query support, add "OCGumbo+Query.h" and enjoy it.
####Query APIs####
| Method | Description |
|---|---|
| .Query( ) | Query children elements from current node by selector |
| .text( ) | Get the combined text contents of current object |
| .html( ) | Get the raw contents of current element |
| .attr( ) | Get the attribute value of the element by attributeName |
| .find( ) | Find elements that match the selector in the current collection |
| .children( ) | Get immediate children of each element in the current collection matching the selector |
| .parent( ) | Get immediate parents of each element in the collection matching the selector |
| .parents( ) | Get all ancestors of each element in the collection matching the selector |
| .first( ) | Get the first element of the current collection |
| .last( ) | Get the last element of the current collection |
| .get ( ) | Get the element by index from current collection |
| .index( ) | Get the position of an element in current collection |
| .hasClass( ) | Check if any elements in the collection have the specified class |
####Examples####
NSLog(@"options: %@", document.Query(@"body").find(@"#select").find(@"option"));
NSLog(@"title: %@", document.Query(@"title").text());
NSLog(@"attribute: %@", document.Query(@"select").first().attr(@"id"));
NSLog(@"class: %@", document.Query(@"#select").parents(@".main"));
NSLog(@"tag.class: %@", document.Query(@"div.theCls"));
NSLog(@"tag#id : %@", document.Query(@"div#theId"));