వాడుకరి:Mpradeepbot/mpc.villageCreate.py
ఈ ప్రోగ్రాముకు సహాకారిగా ఈ ఫైలును కూడా వాడండి.
import wikipedia, pagegenerators, catlib, config, codecs, time # This function returns the list of articles as a list object # in given category. Please give only the Category Name, # namespace will be addd automatically. # --function requires both 'wikipedia' and 'catlib' to be imported def getCatList(catTitle): cat = catlib.Category(wikipedia.getSite(), u'Category:'+catTitle) listOfArticles = cat.articles() return listOfArticles def doReplacements(oText, en_list, te_list): nText = oText if len(en_list) != len(te_list): wikipedia.output(u'length of the lists do not match.') return oText count = len(en_list) i = 0 while i < count: nText = nText.replace(en_list[i], te_list[i]) i = i + 1 return nText def createSubPages(otext, hDes, mainContent, disambigTemplate, reviewTemplate, comments, mTitle): en_list = [] en_list.append(u'gggg') en_list.append(u'mmmm') en_list.append(u'jjjj') diff = 0 lines = otext.splitlines() newpages = (len(lines) + 1) / 3; ntext = u'' # number of line did not match the pattern if ((len(lines) - 2) % 3) != 0: return otext # number of reviewTemplates did not match the pattern if (newpages-1) != otext.count(reviewTemplate): return otext print '%d new pages are being created' % newpages logfile.write(u'* [[' + mTitle + u']]\r\n') site = wikipedia.getSite() i = 0 while i < newpages: next = lines[i*3].find('\'\'\'',3) if i == 0: gggg = lines[i][3:next] mmmm = lines[i*3-diff][lines[i*3-diff].rfind('[[')+2:lines[i*3-diff].rfind(']]')] mmmm = mmmm.split(' (')[0].split('(')[0].split(' ,')[0].split(',')[0] temp = mmmm.split('|') if len(temp) > 1: mmmm = temp[1] jjjj = lines[i*3-diff][lines[i*3-diff].find('[[')+2:lines[i*3-diff].find(']]')] jjjj = jjjj.split(' (')[0].split('(')[0].split(' ,')[0].split(',')[0] temp = jjjj.split('|') if len(temp) > 1: jjjj = temp[1] te_list = [] te_list.append(gggg) te_list.append(mmmm) te_list.append(jjjj) if i == 0: ntext = doReplacements(hDes, en_list, te_list) + u'\r\n' ntext = ntext + u'\r\n' + doReplacements(mainContent, en_list, te_list) gPageTitle = u'' + gggg + u' (' + mmmm + u')' logfile.write(u'** [[' + gPageTitle + u']]') gPageContents = u'' + lines[i*3-diff] + u'\r\n' + lines[i*3-diff+1] + u'\r\n' # logfile.write(u'' + gPageContents + u'\r\n') # creating stub pages. page = wikipedia.Page(site, gPageTitle) try: old = page.get() if not page.canBeEdited(): logfile.write(u' - page exists and locked, should contain \r\n' + gPageContents + u'\r\n') old = u'abcd' except wikipedia.IsRedirectPage: logfile.write(u' - page exists and is redirect, should contain \r\n' + gPageContents + u'\r\n') old = u'abcd' except wikipedia.NoPage: old = u'' if old != '': logfile.write(u' - page exists, should contain \r\n' + gPageContents + u'\r\n') else: logfile.write(u'\r\n') try: page.put(gPageContents, comment = comments) except wikipedia.EditConflict: logfile.write(u'Skipping [[%s]] because of edit conflict\r\n' % (page.title())) except wikipedia.SpamfilterError, url: logfile.write(u'Cannot change [[%s]] because of blacklist entry %s\r\n' % (page.title(), url)) if i == newpages - 1: ntext = ntext + u'\r\n\r\n' + disambigTemplate if i == 0: diff = 1 i = i + 1 return ntext logfile = codecs.open('mpc.villageCreate.log', encoding='utf-8', mode='wb') replaceFile = open('mpc.villageCreate.txt', 'rb' ) #omit 3 characters if it is UTF-8 replaceFile.read(3) commentDis = u"" + unicode(replaceFile.readline(), 'utf8') commentVil = u"" + unicode(replaceFile.readline(), 'utf8') # read category line = u"" + unicode(replaceFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') villageCategory = line # read header description line = u"" + unicode(replaceFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') headerDescription = line # read main content line line = u"" + unicode(replaceFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') mainContent = line # read Disambiguation Template line line = u"" + unicode(replaceFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') disambigTemplate = line # read Review Template line line = u"" + unicode(replaceFile.readline(), 'utf8') line = line.replace(u'\n', u'') line = line.replace(u'\r', u'') reviewTemplate = line pageList = getCatList(villageCategory) for page in pageList: #get the page from wikipedia try: # Load the page's text from the wiki original_text = page.get() if not page.canBeEdited(): logfile.write(u'* Skipping locked page [[%s]]\r\n' % page.title()) continue except wikipedia.NoPage: logfile.write(u'* Page [[%s]] not found\r\n' % page.title()) continue except wikipedia.IsRedirectPage: original_text = page.get(get_redirect=True) new_text = createSubPages(original_text, headerDescription, mainContent, disambigTemplate, reviewTemplate, commentVil, page.title()) if new_text == original_text: logfile.write(u'* No changes were necessary in %s\r\n' % page.title()) continue else: # logfile.write(u'* modified [[' + page.title() + u']]\r\n') # logfile.write(u'' + new_text + u'\r\n') # Show the title of the page where the link was found. # Highlight the title in purple. colors = [None] * 5 + [13] * len(page.title()) + [None] * 4 wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors) wikipedia.showDiff(original_text, new_text) try: page.put(new_text, comment = commentDis) except wikipedia.EditConflict: logfile.write(u'Skipping [[%s]] because of edit conflict\r\n' % (page.title())) except wikipedia.SpamfilterError, url: logfile.write(u'Cannot change [[%s]] because of blacklist entry %s\r\n' % (page.title(), url)) print 'Completed modifing, Sleeping for 20 seconds...' time.sleep(10) logfile.close() replaceFile.close()