from prelude import *
from langcode import *
from langtitle import *
from botaccount import *
zh = mwc.Site('zh.wiktionary.org', clients_useragent = UA)
en = mwc.Site('en.wiktionary.org', clients_useragent = UA)
SLP = 0
zh.login(UN, PWD)
en.login(UN, PWD)
focustemp = zh.Pages['Template:拉丁形素']
zpgl = list(focustemp.embeddedin(namespace = 0))
#zpgl = list(zh.categories['有脚本错误的页面'])
pn = list(map(lambda p: p.name, zpgl))
n = len(pn)
i, j = 0, 0 # j不是global, 没用
fail, success, purifylog, titlelog, successall = [], [], [], [], []
effect = ""
effectpage = zh.Pages["User:CrowleyBot/task/9/effect"]
defaultsummary = '[[User:CrowleyBot/task/9|修包含拉丁形素的頁面]]'
dbg, dbgout = False, None
nslist = [0]
audiohead = set()
def simp(s):
return zhc.convert(s, 'zh-cn')
def getlang(c):
return c2n[c]
def getcode(l):
return n2c[l]
# 暂时不检查hyph的第一个参数恰好为语言代码的情况,由Module处理
def need(t):
return True
def istitle(t):
tn = simp(t.name.replace('_', ' ').strip().lower())
try:
return expanded[tn]
except:
return (None, None, None)
def isof(t):
tn = t.name.replace('_', ' ').strip().lower()
if tn in ["音"]:
t.name = "audio"
return True
def process():
zpg = zh.Pages[pn[i]]
if zpg.namespace not in nslist:
return
zast = wtp.parse(zpg.text())
zcats = [c.name for c in zpg.categories()]
zcatcode = set()
for s in zcats:
s = simp(s)
if m := re.fullmatch(r'Category:([^的]*[^罗]的)?([^语]+语A?).+', s): #不可能以分类或分類开头
try:
zcatcode.add(getcode(m.group(2)))
except:
fail.append((15, i, s))
tomodify = []
for j, zt in enumerate(zast.templates):
zlang, zcode, kyu = istitle(zt) #必须简体
if not zlang:
continue
if zcode: #没有zcode, 说明不是语言标题模板
if len(zt.arguments) != 0:
zt.set_arg('l', zlang)
tomodify.append((j, 1))
titlelog.append((i, j, zt.string))
else:
tomodify.append((j, 0))
if not zcode in zcatcode:
zt.insert(0, '[[Category:%s]]\n' % zlang)
zt.insert(0, '%s%s%s\n'% ('=' * kyu, zlang, '=' * kyu))
elif len(zt.arguments) == 0:
titlelog.append((i, j, zt.string))
tomodify.append((j, 0))
zt.insert(0, '%s%s%s\n'% ('=' * kyu, zlang, '=' * kyu))
zast = wtp.parse(zast.string)
zts = zast.templates
for j, m in reversed(tomodify):
if m:
zts[j].name = 'subst:langhd/cat'
else:
del zts[j][:]
#print(zast.string)
for j, zsec in enumerate(zast.get_sections(level = 2)):
zsectitle = zsec.title.strip(' []')
if zsectitle in ['英文', '{{en}}']:
zsec.title = '英语'
zsectitle = '英语'
if zsectitle in ['{{ja}}', '{{jpn}}']:
zsec.title = '日语'
zsectitle = '日语'
if False: #处理包含鬼畜图片、模板、链接的情况 ##
fail.append((8, i, j, zsec.title))
continue
zlang = simp(zsectitle)
try:
zcode = getcode(zlang)
except:
fail.append((9, i, j, zsec.title))
continue
zts = list(filter(isof, zsec.templates))
for k, zt in enumerate(zts):
zt0 = zt.string
nd, err = True, False
if nd:
zt.arguments[0].insert(0, '|' + zcode)
success.append((i, j, k, zsec.title, zt0, zt.string))
edited = zast.string
edited = re.sub(r'==\n\n+', '==\n', edited)
edited = re.sub(r'\n\n+==', '\n==', edited)
edited = re.sub(r'==(參閱|参阅|參考文獻|参考文献|外部鏈接|外部連接|外部链接)==\n', r'===\1===\n', edited)
edited = edited.replace('==英文==', '==英語==')
if edited.find('=中文=') == -1 and edited.find('= 中文 =') == -1:
edited = edited.replace('=={{拉丁形素}}==\n', '')
else:
fail.append((10, i))
tryedit(zpg, edited, summary = defaultsummary, SLP = SLP, fail = fail)