వాడుకరి:Arjunaraoc/R Script for page views of a set of articles in a month using JSON

##getdata gets pagerequests for a list of article names, in a 
##given language wikiproject for the specified month from   http://stats.grok.se/
##lang "te"
##prj project code for wikipedia "", for wikisource "s" etc
##prj is wikiproject code as used for http://stats.grok.se/json/
##ptfile page_title with underscore in place of spaces in titles in one per line with no quotes as obtained in .tsv files
##ym is yyyymm for which data is required
##pv is data frame consisting of page title, pagerequests if empty 
##start getting for all anames,else look for the last article for which 
## data is present and then get data for remaining articles
updatepvdata<-function(prj,ptfile,ym,pvfile) {
    library(jsonlite)
    urlprefix<-"http://stats.grok.se/json/"
    urlc<-paste0(urlprefix,prj,"/",ym,"/")
    ##check the data already obtained
    ##read titles data
    pt<-read.table(ptfile,header=TRUE,as.is=TRUE)
    nrows<-dim(pt)[1]
    
    ##read page views data
    if (file.exists(pvfile))
        pv<-read.table(pvfile,header=TRUE,as.is=TRUE)
    else ##initialise data frame number first for aligned display in Rrm
        pv<-data.frame(page_requests=as.integer(),page_title=as.character(),stringsAsFactors=FALSE)
    
    ## find the first aname for which the data needs to be obtained
    pvlen=dim(pv)[1]
    if (pvlen==0) 
       stindex=1
    else {
        ##check for last title in title list
        mval<-match(pv$page_title[pvlen],pt$page_title)[1]
        if (is.integer(mval))
            stindex=match(pv$page_title[pvlen],pt$page_title)[1]+1
        else stindex=1
    }
    ## get data  for the curret title and update
    nmax=dim(pt)[1] 
    for (i in stindex:nmax) {
        urlcf<-paste0(urlc,pt$page_title[i])
        ptv<-fromJSON(urlcf,flatten=TRUE)
        ptvcount<-sum(unlist(ptv$daily_views))
        tdf<-data.frame(page_requests=ptvcount,page_title=pt$page_title[i],stringsAsFactors=FALSE)
        pv<-rbind(pv,tdf)
        cat("..",i)
    }
    ## write pvfile at the end
    write.table(pv,file=pvfile,row.names=FALSE,quote=FALSE)
    ## return df for immediate checking

}