- Notifications
You must be signed in to change notification settings - Fork117
HTML parsing and querying with CSS selectors
License
NotificationsYou must be signed in to change notification settings
rust-scraper/scraper
Folders and files
Name | Name | Last commit message | Last commit date | |
---|---|---|---|---|
Repository files navigation
HTML parsing and querying with CSS selectors.
scraper
is onCrates.io andGitHub.
Scraper provides an interface to Servo'shtml5ever
andselectors
crates, for browser-grade parsing and querying.
use scraper::Html;let html =r#" <!DOCTYPE html> <meta charset="utf-8"> <title>Hello, world!</title> <h1>Hello, <i>world!</i></h1>"#;let document =Html::parse_document(html);
use scraper::Html;let fragment =Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
use scraper::Selector;let selector =Selector::parse("h1.foo").unwrap();
use scraper::{Html,Selector};let html =r#" <ul> <li>Foo</li> <li>Bar</li> <li>Baz</li> </ul>"#;let fragment =Html::parse_fragment(html);let selector =Selector::parse("li").unwrap();for elementin fragment.select(&selector){assert_eq!("li", element.value().name());}
use scraper::{Html,Selector};let html =r#" <ul> <li>Foo</li> <li>Bar</li> <li>Baz</li> </ul>"#;let fragment =Html::parse_fragment(html);let ul_selector =Selector::parse("ul").unwrap();let li_selector =Selector::parse("li").unwrap();let ul = fragment.select(&ul_selector).next().unwrap();for elementin ul.select(&li_selector){assert_eq!("li", element.value().name());}
use scraper::{Html,Selector};let fragment =Html::parse_fragment(r#"<input name="foo" value="bar">"#);let selector =Selector::parse(r#"input[name="foo"]"#).unwrap();let input = fragment.select(&selector).next().unwrap();assert_eq!(Some("bar"), input.value().attr("value"));
use scraper::{Html,Selector};let fragment =Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");let selector =Selector::parse("h1").unwrap();let h1 = fragment.select(&selector).next().unwrap();assert_eq!("<h1>Hello, <i>world!</i></h1>", h1.html());assert_eq!("Hello, <i>world!</i>", h1.inner_html());
use scraper::{Html,Selector};let fragment =Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");let selector =Selector::parse("h1").unwrap();let h1 = fragment.select(&selector).next().unwrap();let text = h1.text().collect::<Vec<_>>();assert_eq!(vec!["Hello, ","world!"], text);
use html5ever::tree_builder::TreeSink;use scraper::{Html,Selector};let html ="<html><body>hello<p class=\"hello\">REMOVE ME</p></body></html>";let selector =Selector::parse(".hello").unwrap();letmut document =Html::parse_document(html);let node_ids:Vec<_> = document.select(&selector).map(|x| x.id()).collect();for idin node_ids{ document.remove_from_parent(&id);}assert_eq!(document.html(),"<html><head></head><body>hello</body></html>");
Please feel free to open pull requests. If you're planning on implementingsomething big (i.e. not fixing a typo, a small bug fix, minor refactor, etc)then please open an issue first.
About
HTML parsing and querying with CSS selectors
Topics
Resources
License
Uh oh!
There was an error while loading.Please reload this page.
Stars
Watchers
Forks
Packages0
No packages published