For macOS, run the following:
brew install r
brew install --cask r-studio
For Windows, run the following:
choco install r.project
choco install r.studio
install.packages("rvest")
install.packages("dplyr")
library(rvest)
link = "https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes"
page = read_html(link)
page %>% html_elements(css="")
page %>% html_elements(xpath="")
For above page, use the following:
htmlElement <- page %>% html_element("table.sortable")
df <- html_table(htmlEl, header = FALSE)
names(df) <- df[2,]
df = df[-1:-2,]
write.csv(df, "iso_codes.csv")
page <- read_html(url)
image_element <- page %>% html_element(".thumbborder")
image_url <- image_element %>% html_attr("src")
download.file(image_url, destfile = basename("paris.jpg"))
Find the API endpoint and use that as following:
page<-read_html(GET(api_url, timeout(10)))
jsontext <- page %>% html_element("p") %>% html_text()
For a complete example, see dynamic_rvest.R.
install.package("RSelenium")
library(RSelenium)
# Method 1
rD <- rsDriver(browser="chrome", port=9515L, verbose=FALSE)
remDr <- rD[["client"]]
docker run -d -p 4445:4444 selenium/standalone-firefox
remDr <- remoteDriver(
remoteServerAddr = "localhost",
port = 4445L,
browserName = "firefox"
)
remDr$open()
remDr$navigate("https://books.toscrape.com/catalogue/category/books/science-fiction_16")
titleElements <- remDr$findElements(using = "xpath", "//article//img")
titles <- sapply(titleElements, function(x){x$getElementAttribute("alt")[[1]]})
pricesElements <- remDr$findElements(using = "xpath", "//*[@class='price_color']")
prices <- sapply(pricesElements, function(x){x$getElementText()[[1]]})
stockElements <- remDr$findElements(using = "xpath", "//*[@class='instock availability']")
stocks <- sapply(stockElements, function(x){x$getElementText()[[1]]})
df <- data.frame(titles, prices, stocks)
write.csv(df, "books.csv")