用户:CrowleyBot/task/2

CrowleyBot This user is a bot.
(讨论 · 贡献)
操作者:EdwardAlexanderCrowley
设计者:EdwardAlexanderCrowley
是否已批准
机器人权限
任务:处理过时的lang参数,清理word的链接,同时将gloss参数重命名为t
编辑频率:每分钟10笔,视网络情况
自动/手动:全自动
编程语言python
可以紧急停止?直接封禁

受影响页面

技术细节

编辑
  • wikitextparser可以获取结构化页面信息,然后捕捉所有名称符合的模板,然后有几种情况:
  1. {{t|word}} -> 按章节标题(使用wikitextparser的ancestors)补足第二项,或不动
  2. {{t|word|p|q|lang=en}}或{{t|word|p|lang=en|q}} -> {{rhymes|en|word|p|q}}
  3. {{t|en|word|p|q}}不动

现暂不考虑用enwikt对应模板补足缺失的lang参数。

至于word中含有{{l|en|word}}或[[word#英语|word]],悉数清理。

同时,将gloss参数重命名为t。

预期

编辑
{{alternative spelling of|lang=sga|[[ainmm]]||名字}}
{{alternative spelling of|sga|ainmm||名字}}

{{plural of|'''{{l|fr|ambulance}}'''|lang=fr}}
{{plural of|fr|ambulance}}

# 不检测语言代码与语言是否一致
{{alternative spelling of|lang=tpw|{{l|tpw|gûasu}}|t=}}
{{alternative spelling of|tpw|gûasu|t=}}

{{inflection of|factus|factus|gen|f|p|lang=la}}
{{inflection of|la|factus||gen|f|p}}

# {{l}}对拉丁语有特殊处理
{{inflection of|[[agens#拉丁语|agēns]]||abl|n|s|lang=la}}
{{inflection of|la|agēns||abl|n|s}}

输出信息

编辑

源代码

编辑
i, j = 0, 0 # j不是global, 没用
fail, success, purifylog = [], [], []
success1 = []
effect = ""
defaultsummary = '[[User:CrowleyBot/task/2|进一步处理lang参数]],半自动测试阶段'
zhl2enl = {'英': 'en', '法': 'fa', '德': 'de', '徳': 'de', '意大利': 'it', '義大利': 'it', '荷兰': 'nl', '荷蘭': 'nl', '馬來西亞': 'ms', '马来西亚': 'ms'}
dbg, dbgout = False, None

# strip语句对一些特殊页面出错,比如'none,使用自己写的代替
def mystrip(s):
    it = list(re.finditer("'+", s))
    if len(it) >= 2 and it[0].start() == 0 and it[-1].end() == len(s):
        m = min(it[0].end() - it[0].start(), it[-1].end() - it[-1].start())
        s = s[m:-m]
    return s

def purify(s):
    s = mystrip(s)
    # 可以缩减成两个正则表达式以提高效率
    if m := re.fullmatch(r'[^][{}#]*', s):
        return (s, "", 0)
    elif m := re.fullmatch(r'([^][{}#]*)#([^][{}]*)', s):
        return (m.group(1), "", 0)
    elif m := re.fullmatch(r'\{\{l\|([^|}]*)\|([^|}]*)\}\}', s):
        return (m.group(2), "", 0)
    elif m := re.fullmatch(r'\{\{l\|([^|}]*)\|([^|}]*)\|([^|}]*)\}\}', s):
        return (m.group(2), mystrip(m.group(3)), 0)
    elif m := re.match(r'\[\[([^#|]*)\]\]', s):
        return (m.group(1), "", 0)
    elif m := re.match(r'\[\[([^#|]*)#([^|]*)\]\]', s):
        return (m.group(1), "", 0)
    elif m := re.match(r'\[\[([^#|]*)#([^|]*)\|([^]]*)\]\]', s):
        return (m.group(1), mystrip(m.group(3)), 0)
    else:
        return ("", "", 1)

def latitle(s):
    # {{l}}对拉丁语有特殊处理
    d = {"Ā": "A", "ā": "a", "Ă": "A", "ă": "a", "Ē": "E", "ē": "e", "Ĕ": "E", "ĕ": "e", "Ī": "I", "ī": "i", "Ĭ": "I", "ĭ": "i", "Ō": "O", "ō": "o", "Ŏ": "O", "ŏ": "o", "Ū": "U", "ū": "u", "Ŭ": "U", "ŭ": "u", "Ȳ": "Y", "ȳ": "y"}
    def f(c):
        try:
            return d[c]
        except KeyError:
            return c
    return "".join(list(map(f, s)))
    
def process():
    def isof(t):
        tn = t.name.strip().lower()
        if tn in ["plural of", "obsolete spelling of", "alternative spelling of", "inflection of", "alternative form of", "en-noun"]:
            t.name = tn
            return True
        else:
            return False
    zpg = zh.Pages[pn[i]]  # 防止与TongcyBot编辑冲突
    #if zpg.namespace != 0:
    #    return
    zast = wtp.parse(zpg.text())
    zts = list(filter(isof, zast.templates))
    if len(zts) == 0:
        success.append((0, i))
        return
    for j, zt in enumerate(zts):
        zts0 = zt.string
        def process1():
            args = zt.arguments
            for ag in args:
                ag.value = ag.value.strip()
            # 不会修掉所有en-noun的sg参数
            if zt.name == 'en-noun':
                for ag in args:
                    if ag.name == 'sg':
                        ag.name = 'head'
                    elif ag.name == 'pl' or ag.name == 'pl2':
                        ag.positional = True
                success.append((3, i, j, zts0, zt.string))
                return
            an = len(args)
            if an == 0:
                fail.append((10, i, j, zts0))
                return
            if an == 1:
                # 这种没有语言参数的情况复杂,先不处理
                fail.append((15, i, j, zts0))
                return
                if args[0].name == 'lang':
                    fail.append((10, i, j, zts0))
                    return
                al0 = zt.ancestors(type_ = "Section")
                def anf(s):
                    return re.match('[语語]', s.title)
                al1 = list(filter(anf, al0))
                if len(al1) == 0:
                    fail.append((11, i, j, zts0))
                    return
                if len(al1) == 2:
                    fail.append((12, i, j, zts0, list(map(lambda x: x.title.strip(), al0))))
                    return
                if True:
                    tt = al1[0].title.strip()[:-1]
                    try:
                        enl = zhl2enl[tt]
                    except KeyError:
                        print('输入%s的语言代码:' % tt)
                        enl = input()
                        if not enl:
                            fail.append((13, i, j, zts0, list(map(lambda x: x.title.strip(), al0))))
                            return
                    zhl2enl[tt] = enl
                    zt.arguments[0].insert(0, '|' + enl)
                    # 会append两次
                    success.append((1, i, j, zts0, zt.string))
                    # [[fallthrough]]
            if True:
                # 暂时不处理lang参数出错的情况,没有ISO表
                if ag := zt.get_arg("gloss"):
                    ag.name = "t"
                    zt.rm_first_of_dup_args()
                if ag := zt.get_arg("lang"):
                    agv = ag.value
                    del ag[:]
                    zt.arguments[0].insert(0, '|' + agv)
                if ag := zt.get_arg("from"): # alternative spelling of
                    fagv = ag.value
                    del ag[:]
                    # 先存入fagv, 以免影响真实的an
                    # zt.set_arg('from', fagv)
                else:
                    fagv = ""
                args = zt.arguments
                an = len(args)
                p1 = args[0].value
                (p2, p31, err) = purify(args[1].value)
                if err:
                    purifylog.append((err, i, j, zts0))
                else:
                    purifylog.append((0, i, j, zts0, p2, p31))
                    if an == 2:
                        args[1].insert(len(args[1].string), "|")
                        args = zt.arguments
                    if True:
                        p30 = mystrip(args[2].value)
                        p3 = p30
                        if not p3:
                            p3 = p31
                        if p30 and p31 and p30 != p31:
                            purifylog.append((2, i, j, zts0, p30, p31))
                        if p3 == p2:
                            p3 = ""
                        if p1 == "la" and p2 == latitle(p3):
                            p2 = p3
                            p3 = ""
                        args[1].value = p2
                        args[2].value = p3
                        if len(args) == 3 and p3 == "":
                            del args[2][:]
                if fagv:
                    zt.set_arg('from', fagv)
                success.append((2, i, j, zts0, zt.string)) # end of process1
        process1() # end of for j
    if dbg:
        dbgout = (zpg, zast.string)
    # 防止网络波动及权限不够
    for _ in range(5):
        try:
            zpg.edit(zast.string, defaultsummary)
            time.sleep(SLP) #这个脚本很慢
            return
        except:
            time.sleep(4)
    fail.append((99, i))