వాడుకరి:Vyzbot/mandaltemplate listsorter.py

#!/usr/bin/python
# -*- coding: utf-8  -*-
#Scipt to sort villages in alphabetic order in Mandal village templates

import wikipedia, pagegenerators, catlib, config, codecs

# Replace the contents in the page 'pageTitle' with data 'pageData' 
# and add the comment 'comment'
def writeData(pageTitle, pageData, comment):
  page = wikipedia.Page(wikipedia.getSite(), pageTitle)
  try:
    # Load the page's text from the wiki
    data = page.get()
  except wikipedia.NoPage:
    data = u''
  data = pageData
  try:
    page.put(data, comment = comment)
  except wikipedia.EditConflict:
    wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
  except wikipedia.SpamfilterError, url:
    wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), url))
  


comment = u'బాటు:మూసలో గ్రామాలను అక్షరక్రమంలో అమర్చా'
disambig = u"{{అయోమయ నివృత్తి}}"
catPageTitle = u"విశాఖపట్నం జిల్లాకు సంబంధించిన మూసలు‎"


cat1 = catlib.Category(wikipedia.getSite(), catPageTitle)
gen = pagegenerators.CategorizedPageGenerator(cat1, 1, None)

preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 500)

for page in preloadingGen:
    try:
      # Load the page's text from the wiki
      pageData = page.get()
      if not page.canBeEdited():
         wikipedia.output(u'Skipping locked page %s' % page.title())
         continue
    except wikipedia.NoPage:
       wikipedia.output(u'Page %s not found' % page.title())
       continue
    except wikipedia.IsRedirectPage:
       wikipedia.output(u'Page %s is redirect page' % page.title())
       continue
    # check for disambig template and skip the page if it is disambig page
    if pageData.find(disambig) >= 0: 
       wikipedia.output(u'Page %s is Disambiguation Page' % page.title())
       continue

    # checking if template is mandal template and arraging villages alphabetically
    if pageData.find(u'మండలం') >= 0:
    
      firstpart = pageData.split('|list1=')[0]
      lastpart = pageData.split('}}<includeonly>')[1]
      middlepart = pageData.split('|list1=')[1].split('}}<includeonly>')[0]
     
      villages = middlepart.split(u'{{·}}')
      villages = [village.strip() for village in villages]
      villages.sort()
      linker = u'{{·}} '
      sorted_villagelist = u'{{·}} '.join(villages)
      if (sorted_villagelist[:5]== u'{{·}}'):
        sorted_villagelist = sorted_villagelist[5:]
      sorted_templatetext = firstpart+'|list1='+sorted_villagelist+'}}<includeonly>'+lastpart
      #print sorted_templatetext    
      writeData(page.title(),sorted_templatetext, comment)