User:CrowleyBot/task/2
CrowleyBot | |
---|---|
(讨论 · 贡献) | |
操作者: | EdwardAlexanderCrowley |
设计者: | EdwardAlexanderCrowley |
是否已批准? | 否 |
机器人权限? | 否 |
任务: | 处理过时的lang参数,清理word的链接,同时将gloss参数重命名为t |
编辑频率: | 每分钟10笔,视网络情况 |
自动/手动: | 全自動 |
编程语言: | python |
可以緊急停止? | 直接封禁 |
受影響頁面
- Category:使用不推薦使用的模板的Undetermined頁面中非rhymes造成的部分
- 已列于User:CrowleyBot/task/1/list
- 处理("plural of", "obsolete spelling of", "alternative spelling of", "inflection of", "alternative form of", "en-noun"),其中en-noun为顺带,不会全数处理
技术细节
编辑- wikitextparser可以获取结构化页面信息,然后捕捉所有名称符合的模板,然后有几种情况:
{{t|word}} -> 按章节标题(使用wikitextparser的ancestors)补足第二项,或不动
{{t|word|p|q|lang=en}}或{{t|word|p|lang=en|q}} -> {{rhymes|en|word|p|q}}
{{t|en|word|p|q}}不动
现暂不考虑用enwikt对应模板补足缺失的lang参数。
至于word中含有{{l|en|word}}或[[word#英语|word]]
,悉数清理。
同时,将gloss参数重命名为t。
预期
编辑{{alternative spelling of|lang=sga|[[ainmm]]||名字}}
{{alternative spelling of|sga|ainmm||名字}}
{{plural of|'''{{l|fr|ambulance}}'''|lang=fr}}
{{plural of|fr|ambulance}}
# 不检测语言代码与语言是否一致
{{alternative spelling of|lang=tpw|{{l|tpw|gûasu}}|t=大}}
{{alternative spelling of|tpw|gûasu|t=大}}
{{inflection of|factus|factus|gen|f|p|lang=la}}
{{inflection of|la|factus||gen|f|p}}
# {{l}}对拉丁语有特殊处理
{{inflection of|[[agens#拉丁语|agēns]]||abl|n|s|lang=la}}
{{inflection of|la|agēns||abl|n|s}}
输出信息
编辑源代码
编辑i, j = 0, 0 # j不是global, 没用
fail, success, purifylog = [], [], []
success1 = []
effect = ""
defaultsummary = '[[User:CrowleyBot/task/2|进一步处理lang参数]],半自动测试阶段'
zhl2enl = {'英': 'en', '法': 'fa', '德': 'de', '徳': 'de', '意大利': 'it', '義大利': 'it', '荷兰': 'nl', '荷蘭': 'nl', '馬來西亞': 'ms', '马来西亚': 'ms'}
dbg, dbgout = False, None
# strip语句对一些特殊页面出错,比如'none,使用自己写的代替
def mystrip(s):
it = list(re.finditer("'+", s))
if len(it) >= 2 and it[0].start() == 0 and it[-1].end() == len(s):
m = min(it[0].end() - it[0].start(), it[-1].end() - it[-1].start())
s = s[m:-m]
return s
def purify(s):
s = mystrip(s)
# 可以缩减成两个正则表达式以提高效率
if m := re.fullmatch(r'[^][{}#]*', s):
return (s, "", 0)
elif m := re.fullmatch(r'([^][{}#]*)#([^][{}]*)', s):
return (m.group(1), "", 0)
elif m := re.fullmatch(r'\{\{l\|([^|}]*)\|([^|}]*)\}\}', s):
return (m.group(2), "", 0)
elif m := re.fullmatch(r'\{\{l\|([^|}]*)\|([^|}]*)\|([^|}]*)\}\}', s):
return (m.group(2), mystrip(m.group(3)), 0)
elif m := re.match(r'\[\[([^#|]*)\]\]', s):
return (m.group(1), "", 0)
elif m := re.match(r'\[\[([^#|]*)#([^|]*)\]\]', s):
return (m.group(1), "", 0)
elif m := re.match(r'\[\[([^#|]*)#([^|]*)\|([^]]*)\]\]', s):
return (m.group(1), mystrip(m.group(3)), 0)
else:
return ("", "", 1)
def latitle(s):
# {{l}}对拉丁语有特殊处理
d = {"Ā": "A", "ā": "a", "Ă": "A", "ă": "a", "Ē": "E", "ē": "e", "Ĕ": "E", "ĕ": "e", "Ī": "I", "ī": "i", "Ĭ": "I", "ĭ": "i", "Ō": "O", "ō": "o", "Ŏ": "O", "ŏ": "o", "Ū": "U", "ū": "u", "Ŭ": "U", "ŭ": "u", "Ȳ": "Y", "ȳ": "y"}
def f(c):
try:
return d[c]
except KeyError:
return c
return "".join(list(map(f, s)))
def process():
def isof(t):
tn = t.name.strip().lower()
if tn in ["plural of", "obsolete spelling of", "alternative spelling of", "inflection of", "alternative form of", "en-noun"]:
t.name = tn
return True
else:
return False
zpg = zh.Pages[pn[i]] # 防止与TongcyBot编辑冲突
#if zpg.namespace != 0:
# return
zast = wtp.parse(zpg.text())
zts = list(filter(isof, zast.templates))
if len(zts) == 0:
success.append((0, i))
return
for j, zt in enumerate(zts):
zts0 = zt.string
def process1():
args = zt.arguments
for ag in args:
ag.value = ag.value.strip()
# 不会修掉所有en-noun的sg参数
if zt.name == 'en-noun':
for ag in args:
if ag.name == 'sg':
ag.name = 'head'
elif ag.name == 'pl' or ag.name == 'pl2':
ag.positional = True
success.append((3, i, j, zts0, zt.string))
return
an = len(args)
if an == 0:
fail.append((10, i, j, zts0))
return
if an == 1:
# 这种没有语言参数的情况复杂,先不处理
fail.append((15, i, j, zts0))
return
if args[0].name == 'lang':
fail.append((10, i, j, zts0))
return
al0 = zt.ancestors(type_ = "Section")
def anf(s):
return re.match('[语語]', s.title)
al1 = list(filter(anf, al0))
if len(al1) == 0:
fail.append((11, i, j, zts0))
return
if len(al1) == 2:
fail.append((12, i, j, zts0, list(map(lambda x: x.title.strip(), al0))))
return
if True:
tt = al1[0].title.strip()[:-1]
try:
enl = zhl2enl[tt]
except KeyError:
print('输入%s的语言代码:' % tt)
enl = input()
if not enl:
fail.append((13, i, j, zts0, list(map(lambda x: x.title.strip(), al0))))
return
zhl2enl[tt] = enl
zt.arguments[0].insert(0, '|' + enl)
# 会append两次
success.append((1, i, j, zts0, zt.string))
# [[fallthrough]]
if True:
# 暂时不处理lang参数出错的情况,没有ISO表
if ag := zt.get_arg("gloss"):
ag.name = "t"
zt.rm_first_of_dup_args()
if ag := zt.get_arg("lang"):
agv = ag.value
del ag[:]
zt.arguments[0].insert(0, '|' + agv)
if ag := zt.get_arg("from"): # alternative spelling of
fagv = ag.value
del ag[:]
# 先存入fagv, 以免影响真实的an
# zt.set_arg('from', fagv)
else:
fagv = ""
args = zt.arguments
an = len(args)
p1 = args[0].value
(p2, p31, err) = purify(args[1].value)
if err:
purifylog.append((err, i, j, zts0))
else:
purifylog.append((0, i, j, zts0, p2, p31))
if an == 2:
args[1].insert(len(args[1].string), "|")
args = zt.arguments
if True:
p30 = mystrip(args[2].value)
p3 = p30
if not p3:
p3 = p31
if p30 and p31 and p30 != p31:
purifylog.append((2, i, j, zts0, p30, p31))
if p3 == p2:
p3 = ""
if p1 == "la" and p2 == latitle(p3):
p2 = p3
p3 = ""
args[1].value = p2
args[2].value = p3
if len(args) == 3 and p3 == "":
del args[2][:]
if fagv:
zt.set_arg('from', fagv)
success.append((2, i, j, zts0, zt.string)) # end of process1
process1() # end of for j
if dbg:
dbgout = (zpg, zast.string)
# 防止网络波动及权限不够
for _ in range(5):
try:
zpg.edit(zast.string, defaultsummary)
time.sleep(SLP) #这个脚本很慢
return
except:
time.sleep(4)
fail.append((99, i))