#!/usr/bin/python
# -*- coding: utf-8 -*-
# Optimized Script to generate mentions of orphan pages and update Telugu Wikipedia page + optional offline logging
import pywikibot
from pywikibot import pagegenerators
import warnings
from concurrent.futures import ThreadPoolExecutor
# Suppress UserWarnings from pywikibot
warnings.filterwarnings("ignore", category=UserWarning, module='pywikibot')
# Connect to Telugu Wikipedia
site = pywikibot.Site('te', 'wikipedia')
# Function to check if a page is truly an orphan page
def is_orphan_page(page):
backlinks = list(page.backlinks(namespaces=0))
return len(backlinks) == 0
# Function to search for mentions of a given page, restricting to main namespace (namespace 0)
def search_for_mentions(site, page_title):
search_results = site.search(page_title, total=10, namespaces=0)
return [result.title() for result in search_results if result.title() != page_title]
# Fetch orphaned pages using the special page generator
orphaned_pages_gen = pagegenerators.LonelyPagesPageGenerator(total=None, site=site)
orphaned_pages = list(pagegenerators.PreloadingGenerator(orphaned_pages_gen, 500)) # Load all pages at once
# Function to process each orphaned page
def process_orphan_page(orphaned_page, page_num):
page_title = orphaned_page.title()
# Check if the page is an orphan
if not is_orphan_page(orphaned_page):
print(f"Skipping page '{page_title}' as it is not an orphan page.")
return None # Return None for non-orphan pages to skip them
print(f"Processing orphaned page #{page_num}: {page_title}")
# Search for pages that mention the orphaned page title
mentioned_in = search_for_mentions(site, page_title)
if mentioned_in:
mentions_str = "\n".join([f": [[{mention}]]" for mention in mentioned_in])
return f"\r\n{page_num}. అనాథ పేజీ: [[{page_title}]] - ఈ వ్యాసాలలో ప్రస్తావించబడింది:\n{mentions_str}"
else:
return f"\r\n{page_num}. అనాథ పేజీ: [[{page_title}]] - ఎక్కడా ప్రస్తావించబడలేదు."
# Use ThreadPoolExecutor to parallelize the process of searching mentions
with ThreadPoolExecutor(max_workers=5) as executor:
log_data = []
orphaned_page_count = 0 # Counter for orphaned pages to be processed
future_to_page = {}
for page in orphaned_pages:
# Check if the page is an orphan before submitting to the executor
if is_orphan_page(page):
orphaned_page_count += 1 # Increment only for orphan pages
future = executor.submit(process_orphan_page, page, orphaned_page_count)
future_to_page[future] = page # Map future to page for reference
# Collect results from futures
for future in future_to_page:
result = future.result()
if result: # Only append results that are not None
log_data.append(result)
# Write the log data to an offline text file (optional logging)
with open('orphaned_page_mentions_tewiki.txt', 'w', encoding='utf-8') as logfile:
logfile.write("\n".join(log_data)) # Write the same log data to a file
# Write all log data directly to the Telugu Wikipedia page (overwrite content)
tewiki_page = pywikibot.Page(site, 'వికీపీడియా:వికీప్రాజెక్టు/అనాథాశ్రమం/అనాథ వ్యాసాల ప్రస్తావనలు')
# Overwrite the page with the new log data
new_content = "\n".join(log_data) # Join log data into one string
tewiki_page.text = new_content # Set the new content directly
# Save the updated content to the page (rewriting the content)
tewiki_page.save(summary='అనాథ వ్యాసాల ప్రస్తావనలు తాజాకరించా', minor=False)
print("Script completed. Results written to both the Telugu Wikipedia page and 'orphaned_page_mentions_tewiki.txt'.")