వాడుకరి:Arjunaraoc/R Script for page views of a set of articles in a month using JSON
##getdata gets pagerequests for a list of article names, in a ##given language wikiproject for the specified month from http://stats.grok.se/ ##lang "te" ##prj project code for wikipedia "", for wikisource "s" etc ##prj is wikiproject code as used for http://stats.grok.se/json/ ##ptfile page_title with underscore in place of spaces in titles in one per line with no quotes as obtained in .tsv files ##ym is yyyymm for which data is required ##pv is data frame consisting of page title, pagerequests if empty ##start getting for all anames,else look for the last article for which ## data is present and then get data for remaining articles updatepvdata<-function(prj,ptfile,ym,pvfile) { library(jsonlite) urlprefix<-"http://stats.grok.se/json/" urlc<-paste0(urlprefix,prj,"/",ym,"/") ##check the data already obtained ##read titles data pt<-read.table(ptfile,header=TRUE,as.is=TRUE) nrows<-dim(pt)[1] ##read page views data if (file.exists(pvfile)) pv<-read.table(pvfile,header=TRUE,as.is=TRUE) else ##initialise data frame number first for aligned display in Rrm pv<-data.frame(page_requests=as.integer(),page_title=as.character(),stringsAsFactors=FALSE) ## find the first aname for which the data needs to be obtained pvlen=dim(pv)[1] if (pvlen==0) stindex=1 else { ##check for last title in title list mval<-match(pv$page_title[pvlen],pt$page_title)[1] if (is.integer(mval)) stindex=match(pv$page_title[pvlen],pt$page_title)[1]+1 else stindex=1 } ## get data for the curret title and update nmax=dim(pt)[1] for (i in stindex:nmax) { urlcf<-paste0(urlc,pt$page_title[i]) ptv<-fromJSON(urlcf,flatten=TRUE) ptvcount<-sum(unlist(ptv$daily_views)) tdf<-data.frame(page_requests=ptvcount,page_title=pt$page_title[i],stringsAsFactors=FALSE) pv<-rbind(pv,tdf) cat("..",i) } ## write pvfile at the end write.table(pv,file=pvfile,row.names=FALSE,quote=FALSE) ## return df for immediate checking }