library(RSelenium)
library(wdman)
library(rvest)
library(plyr)
library(xml2)
library(jsonlite)

setwd("D:/Talks/Scraping Workshop")

### WIKIART ###################
## TO GET LIST OF ARTWORKS ##
page <- read_html("https://www.wikiart.org/en/jacques-louis-david/all-works/text-list")

## Identify CSS Selector 

## TO GET ONE ARTWORK NAME AND WEBLINK ##
art <- page %>% html_nodes("li.painting-list-text-row:nth-child(9) > a:nth-child(1)") %>% html_text()
art
link <- page %>% html_nodes("li.painting-list-text-row:nth-child(9) > a:nth-child(1)") %>% html_attr("href")
link

## We want all the artworks

## TO GET ALL ARTWORK NAMES AND WEBLINKS IN ONE GO ##
art <- page %>% html_nodes("li.painting-list-text-row > a:nth-child(1)") %>% html_text()
art <- as.data.frame(art)
link <- page %>% html_nodes("li.painting-list-text-row > a:nth-child(1)") %>% html_attr("href")
art$link <- link 
art$link <- paste0("https://www.wikiart.org",art$link)

## TO GET ARTWORK SPECIFIC DATA ######################


page <- read_html(art[1,2])

title <- page %>% html_nodes(".wiki-layout-artist-info > article:nth-child(3) > h3:nth-child(1)") %>% html_text()
artist <- page %>% html_nodes(".wiki-layout-artist-info > article:nth-child(3) > h5:nth-child(2) > span:nth-child(1) > a:nth-child(1)") %>% html_text()
origin <- page %>% html_nodes(".wiki-layout-artist-info > article:nth-child(3) > ul:nth-child(3) > li:nth-child(2) > span:nth-child(2)") %>% html_text()
style <- page %>% html_nodes("li.dictionary-values:nth-child(3) > span:nth-child(2) > a:nth-child(1)") %>% html_text()
genre <- page %>% html_nodes("li.dictionary-values:nth-child(4) > span:nth-child(2) > a:nth-child(1) > span:nth-child(1)") %>% html_text()
media <- page %>% html_nodes("li.dictionary-values:nth-child(5) > span:nth-child(2)") %>% html_text()
location <- page %>% html_nodes(".dictionary-values-gallery > span:nth-child(2)") %>% html_text()

img.url <- page %>% html_nodes(".wiki-layout-artist-image-wrapper > img:nth-child(3)") %>% html_attr("src")

download.file(img.url, "art.jpg", mode = "wb")