install.packages('XML')
install.packages('httr')
library(XML)
library(httr)
urls<-'http://www.filmsite.org/filmgenres.html'
parsed<-htmlParse(urls)
tab<-readHTMLTable(parsed)
tab[3]
url_1<-'http://movie.douban.com/top250?start='
page<-seq(0,225,25)
url_new<-paste(url_1,page,sep='')
lapply(url_new,function(x) readHTMLTable(htmlParse(x)))
movies<-lapply(url_new,function(x) htmlParse(x))
lapply(movies,function(x) xpathSApply(x,"//span[@class='title']",xmlValue))
write.table(url_2,'E:/movies.csv',sep=',',col.names=NA)
#url_2é½æ²¡æå®ä¹ã
#æ³ä¿å
lapply(movies,function(x) xpathSApply(x,"//span[@class='title']",xmlValue))
#çç»æï¼å¾å
ç¨å¼åç»æ
mov <- lapply(movies,function(x) xpathSApply(x,"//span[@class='title']",xmlValue))
#movæ¯listï¼å¾è½¬æ¢ä¸ä¸ï¼ç»æä»ç»url_2
url_2 <- unlist(mov)
#ç¶åä¿å
write.table(url_2,'E:/movies.csv',sep=',',col.names=NA)
å·²ç»éµäº¡ããã
å·²ç»éµäº¡ããã