User:CrowleyBot/task/9

一些受影响页面

输出信息

/list：影响页面，可能是按页面号排序的
/fail
/zw：含==?中文==?的页面，拉丁形素没有处理
/latin：拉丁形素不作为子标题的页面
源代码

from prelude import *
from langcode import *
from langtitle import *
from botaccount import *

zh = mwc.Site('zh.wiktionary.org', clients_useragent = UA)
en = mwc.Site('en.wiktionary.org', clients_useragent = UA)
SLP = 0
zh.login(UN, PWD)
en.login(UN, PWD)

focustemp = zh.Pages['Template:拉丁形素']
zpgl = list(focustemp.embeddedin(namespace = 0))
#zpgl = list(zh.categories['有脚本错误的页面'])
pn = list(map(lambda p: p.name, zpgl))
n = len(pn)

i, j = 0, 0 # j不是global, 没用
fail, success, purifylog, titlelog, successall = [], [], [], [], []
effect = ""
effectpage = zh.Pages["User:CrowleyBot/task/9/effect"]
defaultsummary = '[[User:CrowleyBot/task/9|修包含拉丁形素的頁面]]'
dbg, dbgout = False, None
nslist = [0]
audiohead = set()

def simp(s):
    return zhc.convert(s, 'zh-cn')

def getlang(c):
    return c2n[c]

def getcode(l):
    return n2c[l]

# 暂时不检查hyph的第一个参数恰好为语言代码的情况，由Module处理
def need(t):
    return True

def istitle(t):
    tn = simp(t.name.replace('_', ' ').strip().lower())
    try:
        return expanded[tn]
    except:
        return (None, None, None)

def isof(t):
    tn = t.name.replace('_', ' ').strip().lower()
    if tn in ["音"]:
        t.name = "audio"
        return True

def process():
    zpg = zh.Pages[pn[i]]
    if zpg.namespace not in nslist:
        return
    zast = wtp.parse(zpg.text())
    zcats = [c.name for c in zpg.categories()]
    zcatcode = set()
    for s in zcats:
        s = simp(s)
        if m := re.fullmatch(r'Category:([^的]*[^罗]的)?([^语]+语A?).+', s): #不可能以分类或分類开头
            try:
                zcatcode.add(getcode(m.group(2)))
            except:
                fail.append((15, i, s))
    tomodify = []
    for j, zt in enumerate(zast.templates):
        zlang, zcode, kyu = istitle(zt) #必须简体
        if not zlang:
            continue
        if zcode: #没有zcode, 说明不是语言标题模板
            if len(zt.arguments) != 0:
                zt.set_arg('l', zlang)
                tomodify.append((j, 1))
                titlelog.append((i, j, zt.string))
            else:
                tomodify.append((j, 0))
                if not zcode in zcatcode:
                    zt.insert(0, '[[Category:%s]]\n' % zlang)
            zt.insert(0, '%s%s%s\n'% ('=' * kyu, zlang, '=' * kyu))
        elif len(zt.arguments) == 0:
            titlelog.append((i, j, zt.string))
            tomodify.append((j, 0))
            zt.insert(0, '%s%s%s\n'% ('=' * kyu, zlang, '=' * kyu))
    
    zast = wtp.parse(zast.string)
    zts = zast.templates
    for j, m in reversed(tomodify):
        if m:
            zts[j].name = 'subst:langhd/cat'
        else:
            del zts[j][:]
    
    #print(zast.string)
    for j, zsec in enumerate(zast.get_sections(level = 2)):
        zsectitle = zsec.title.strip(' []')
        if zsectitle in ['英文', '{{en}}']:
            zsec.title = '英语'
            zsectitle = '英语'
        if zsectitle in ['{{ja}}', '{{jpn}}']:
            zsec.title = '日语'
            zsectitle = '日语'
        if False: #处理包含鬼畜图片、模板、链接的情况 ##
            fail.append((8, i, j, zsec.title))
            continue
        zlang = simp(zsectitle)
        try:
            zcode = getcode(zlang)
        except:
            fail.append((9, i, j, zsec.title))
            continue
        zts = list(filter(isof, zsec.templates))
        for k, zt in enumerate(zts):
            zt0 = zt.string
            nd, err = True, False
            if nd:
                zt.arguments[0].insert(0, '|' + zcode)
                success.append((i, j, k, zsec.title, zt0, zt.string))
    edited = zast.string
    edited = re.sub(r'==\n\n+', '==\n', edited)
    edited = re.sub(r'\n\n+==', '\n==', edited)
    edited = re.sub(r'==(參閱|参阅|參考文獻|参考文献|外部鏈接|外部連接|外部链接)==\n', r'===\1===\n', edited)
    edited = edited.replace('==英文==', '==英語==')
    if edited.find('=中文=') == -1 and edited.find('= 中文 =') == -1:
        edited = edited.replace('=={{拉丁形素}}==\n', '')
    else:
        fail.append((10, i))
    tryedit(zpg, edited, summary = defaultsummary, SLP = SLP, fail = fail)