from prelude import *
from botaccount import *
zh = mwc.Site('zh.wiktionary.org', clients_useragent = UA)
en = mwc.Site('en.wiktionary.org', clients_useragent = UA)
zh.login(UN, PWD)
en.login(UN, PWD)
SLP = 0
focustemp = zh.Pages['Template:la-noun']
zpgl = list(focustemp.embed(namespace = 0))
#zpgl = list(zh.categories['有脚本错误的页面'])
pn = list(map(lambda p: p.name, zpgl))
n = len(zpgl)
#epgl = list(map(getepg, zpgl))
fail, success, purifylog, titlelog, successall = [], [], [], [], []
dbg, dbgout = False, None
fail1, fail2 = [], []
i, j = 0, 0 # j不是global, 没用
focus = "la-noun"
subtitle = 'Latin'
re1 = r'\{\{' + r'[%s%s]' % (focus[0].upper(), focus[0].lower()) + focus[1:] + r'(?=[\|\}])'
re2 = '{{%s' % focus
re3 = r'\{\{' + focus + r'(?=[\|\}])'
re4 = re3 + r'[^{}]*\}\}'
defaultsummary = 'Bot: 自enwikt修%s' % focus
nslist = [0]
def process():
zpg = zh.Pages[pn[i]]
epg = en.Pages[pn[i]]
if zpg.namespace not in nslist:
return
ztext = zpg.text()
ztext = re.sub(re1, re2, ztext)
it = list(re.finditer(re3, ztext))
if len(it) == 0:
return
if len(it) > 1:
print((i, pn[i]))
fail1.append((i, pn[i]))
return
east = wtp.parse(epg.text())
etmatch = []
for j, esec in enumerate(east.get_sections(level = 2)):
esectitle = esec.title.strip(' []') #不放心
if esectitle != subtitle:
continue
ets = esec.templates
for k, et in enumerate(ets):
if et.name == focus:
etmatch.append(et.string)
if len(etmatch) >= 1 and its.all_equal(etmatch):
etm = etmatch[0]
print((0, i, etm))
ntxt = re.sub(re3, '{{subst:realtemplate|faketemplate=%s' % etm, ztext, count = 1)
tryedit(zpg, ntxt, summary=defaultsummary, fail=fail)
else:
print((2, i, pn[i]))
fail2.append((i, pn[i]))
for x in etmatch:
print(x)
def dryprocess():
zpg = zh.Pages[pn[i]]
epg = en.Pages[pn[i]]
if zpg.namespace not in nslist:
return
ztext = zpg.text()
ztext = re.sub(re1, re2, ztext)
it = list(re.finditer(re4, ztext))
print('zh %d [[%s]]' % (i, pn[i]))
for x in it:
print(x.group(0))
east = wtp.parse(epg.text())
etmatch = []
for j, esec in enumerate(east.get_sections(level = 2)):
esectitle = esec.title.strip(' []') #不放心
if esectitle != subtitle:
continue
ets = esec.templates
for k, et in enumerate(ets):
if et.name == focus:
etmatch.append(et.string)
print('en [[:en:%s]]' % pn[i])
for x in etmatch:
print(x)
def forceprocess(dest = ""):
zpg = zh.Pages[pn[i]]
epg = en.Pages[pn[i]]
if zpg.namespace not in nslist:
return
ztext = zpg.text()
ztext = re.sub(re1, re2, ztext)
it = list(re.finditer(re3, ztext))
if len(it) == 0:
return
if dest:
zpg.edit(re.sub(re3, '{{subst:realtemplate|faketemplate=%s' % dest, ztext), defaultsummary)
return
east = wtp.parse(epg.text())
etmatch = []
for j, esec in enumerate(east.get_sections(level = 2)):
esectitle = esec.title.strip(' []') #不放心
if esectitle != subtitle:
continue
ets = esec.templates
for k, et in enumerate(ets):
if et.name == focus:
etmatch.append(et.string)
if len(etmatch) >= 1:
zpg.edit(re.sub(re3, '{{subst:realtemplate|faketemplate=%s' % etmatch[0], ztext), defaultsummary)
# TODO: 最好输出zh内容
def concordprocess():
zpg = zh.Pages[pn[i]]
epg = en.Pages[pn[i]]
if zpg.namespace not in nslist:
return
ztext = zpg.text()
ztext = re.sub(re1, re2, ztext)
it = list(re.finditer(re3, ztext))
if len(it) == 0:
return
east = wtp.parse(epg.text())
etmatch = []
for j, esec in enumerate(east.get_sections(level = 2)):
esectitle = esec.title.strip(' []') #不放心
if esectitle != subtitle:
continue
ets = esec.templates
for k, et in enumerate(ets):
if et.name == focus:
etmatch.append(et.string)
if len(etmatch) == len(it):
for j in range(len(it)):
ztext = re.sub(re3, '{{subst:realtemplate|faketemplate=%s' % fake(etmatch[j]), ztext, count = 1)
zpg.edit(ztext, defaultsummary)
else:
print((i, "different length"))