工作用数据

编辑
  • 代码:
cat zhwiktionary-20210720-pages-articles.xml | awk -F '>' 'match($0, /^    <title>([^<]*)/, a){ if (substr(a[1],1,9) == "Template:") { print; f = "true"; } else { f = ""; } next}/^      <text/,/<[/]text>/{ if (f) print; next}' > zhwiktionary-templates.xml
cat zhwiktionary-20210720-pages-articles.xml | awk -F '>' 'match($0, /^    <title>([^<]*)/, a){ if (substr(a[1],1,7) == "Module:") { print; f = "true"; } else { f = ""; } next}/^      <text/,/<[/]text>/{ if (f) print; next}' > zhwiktionary-modules.xml
grep -e '[^{]{{{{{[^{|]' zhwiktionary-templates.xml
grep -e '{{[MmFfNnCcSsPp][}|]' zhwiktionary-templates.xml

grep -e '^    <title>\|^{{[MmFfNnCcSsPp][|}]\|[^{]{{[MmFfNnCcSsPp][|}]' zhwiktionary-templates.xml | grep -B 1 -e '{{[MmFfNnCcSsPp][|}]' > /tmp/tmpt1
cat /tmp/tmpt1 | grep '<title>' | awk -F '[<>]' '{print $3}' > /tmp/tmpt2

源代码

编辑

实际运行中并没有处理csp

def process():
    zpg = zh.Pages[pn[i]]
    ztxt = zpg.text()
    def f(m):
        def g(s):
            if len(s) == 1 and s in "MmFfNnCcSsPp":
                return 'g' + s.lower()
            elif len(s) == 1 and s in "陰阴":
                return 'gf'
            elif len(s) == 1 and s in "陽阳":
                return 'gm'
            else:
                return s
        m2 = g(m[2])
        m3 = '|'.join(map(g, m[3].split('|')))
        m0 = '%s{{%s%s}}' % (m[1], m2, m3)
        if m[1]:
            print(m[0][1:])
            print(m0[1:])
        else:
            print(m[0])
            print(m0)
        return m0
    ztxt = re.sub(r'([^{]|^)\{\{([MmFfNnCcSsPp陰阴陽阳])(?=[}|])([^{}]*)\}\}', f, ztxt)
    tryedit(zpg, ztxt, summary='Bot: 淘汰模板cfmnps', minor=True)