import mwclient as mwc
import wikitextparser as wtp
import zhconv as zhc
import os, sys, time, re, inspect
import subprocess as subp
from contextlib import redirect_stdout
# with open('/tmp/tmp', 'w') as f, redirect_stdout(f): print('123\n456')
from fishhook import *
import itertools as its
import more_itertools as mits
its.__dict__.update(mits.__dict__)
import operator
import functools as fct
import bisect
import collections
from collections import defaultdict
import copy
import csv
import pickle
mwc.page.Page.embed = mwc.page.Page.embeddedin
#@hook(int)
#def __add__(self, other):
# return orig(self, other)
@hook_cls(str)
class str_hook:
def replacen(self, pattern, repl, count=0):
# 支持\g<0>
return re.subn(re.escape(pattern), repl, self, count=count)
def replacebang(self, a, b):
self = self.replace(a, b)
def subbang(self, pattern, repl, count=0, flags=0):
self = re.sub(pattern, repl, self, count, flags)
def wipec(self, s):
return re.sub(r'[' + re.escape(s) + r']', '', self)
def wipel(self, l):
return re.sub(r'(' + '|'.join(map(re.escape, l)) + r')', '', self)
def wipecl(self, s, l):
return re.sub(r'([' + re.escape(s) + r']|' + '|'.join(map(re.escape, l)) + r')', '', self)
def findo(self, s):
return self.find(s) != -1
def findom(self, s):
return re.search(re.escape(s), self)
def findr(self, s):
return re.search(s, self)
def finds(self, s):
return re.search(r'(%s|%s)' % (simp(s), comp(s)), self)
def findsl(self, l):
l = [simp(s) for s in l] + [comp(s) for s in l]
return re.search(r'(' + '|'.join(map(re.escape, l)) + r')', self)
def findol(self, l):
return re.search(r'(' + '|'.join(map(re.escape, l)) + r')', self)
def simp(self):
return zhc.convert(self, 'zh-cn')
def comp(self):
return zhc.convert(self, 'zh-tw')
i, j = 0, 0
def search(site, search="", namespace=0, limit=None, generator=True):
"""执行一次全文本搜索。
API doc: https://www.mediawiki.org/wiki/API:Search
Args:
namespace (int): Restricts search to a given namespace (Default: None)
limit (int): Maximum amount of pages to return per request
generator (bool): Return generator (Default: True)
Returns:
mwclient.listings.List: Page iterator
"""
pfx = mwc.listing.List.get_prefix('sr', generator)
kwargs = dict(mwc.listing.List.generate_kwargs(pfx,
namespace=namespace, search=search, prop=None))
return mwc.listing.List.get_list(generator)(
site, 'search', 'sr', limit=limit, return_values='title',
**kwargs
)
def prefixsearch(site, search="", namespace=0, profile='engine_autoselect', limit=None, generator=True):
"""为页面标题执行前缀搜索。
API doc: https://www.mediawiki.org/wiki/API:Prefixsearch
Args:
namespace (int): Restricts search to a given namespace (Default: None)
limit (int): Maximum amount of pages to return per request
generator (bool): Return generator (Default: True)
Returns:
mwclient.listings.List: Page iterator
"""
pfx = mwc.listing.List.get_prefix('ps', generator)
kwargs = dict(mwc.listing.List.generate_kwargs(
pfx, search=search, namespace=namespace, profile=profile
))
return mwc.listing.List.get_list(generator)(
site, 'prefixsearch', 'ps',
limit=limit, return_values='title', **kwargs
)
def categorylist(site, title, namespace=0, sort="sortkey", starthexsortkey=None, endhexsortkey=None, startsortkeyprefix=None, endsortkeyprefix=None, limit=None, generator=True):
"""在指定的分类中列出所有页面。
API doc: https://www.mediawiki.org/wiki/API:Categorymembers
Args:
namespace (int): Restricts search to a given namespace (Default: None)
limit (int): Maximum amount of pages to return per request
generator (bool): Return generator (Default: True)
Returns:
mwclient.listings.List: Page iterator
"""
pfx = mwc.listing.List.get_prefix('cm', generator)
kwargs = dict(mwc.listing.List.generate_kwargs(pfx, prop=None,
title=title, namespace=namespace, sort=sort,
starthexsortkey=starthexsortkey, endhexsortkey=endhexsortkey,
startsortkeyprefix=startsortkeyprefix, endsortkeyprefix=endsortkeyprefix))
return mwc.listing.List.get_list(generator)(
site, 'categorymembers', 'cm', limit=limit, return_values='title',
**kwargs
)
catcat = categorylist
def tryedit(page, text, summary="", SLP=0, fail=None, failformat="(99, %s)", origin=None, minor=True, bot=True):
if text == origin:
return True
for _ in range(5):
try:
page.edit(text, summary, minor=minor, bot=bot)
return True
except:
time.sleep(SLP)
if fail != None:
if callable(fail):
fail()
return False
fail.append(failformat % (page.name,))
return False
# reliable
def getepg(pg):
try:
return en.Pages[dict(pg.langlinks())['en']]
except:
return None
def simp(s):
return zhc.convert(s, 'zh-cn')
def comp(s):
return zhc.convert(s, 'zh-tw')
def fake(s):
return s.replace("{", "⦃").replace("|", "¦").replace("=", "≈").replace("}", "⦄")
def savedata(d, file="data.py"):
if not isinstance(d, list):
d = [d]
if any(not isinstance(s, str) for s in d):
print('data should be a string or a list of strings')
return
g = globals()
with open(file, 'w') as f:
print('# %s' % d, file=f)
for s in d:
print('%s = %s' % (s, g[s]), file=f)
def postlist(pg, pn, summary='', bot=False):
s = '# <li value="0">[[%s]]</li>\n' % pn[0]
for i in range(1, len(pn)):
s += '# [[%s]]\n' % pn[i]
tryedit(pg, s, summary, bot=bot)
def createjs(site, title, text, prefix='User:CrowleyBot/t/'):
p = site.Page['User:CrowleyBot/template.js']
if tryedit(p, text):
p.move(prefix + title, no_redirect=True)
else:
return False
# https://stackoverflow.com/questions/15959534/visibility-of-global-variables-in-imported-modules/69029612
def superglobals():
_globals = dict(inspect.getmembers(
inspect.stack()[len(inspect.stack()) - 1][0]))["f_globals"]
return _globals
def fromclip(s, t='str'):
s = s.split('\n')
s = [x.strip() for x in s if not x.strip() == '']
s = [x.replace('<title>', '').replace('</title>', '') for x in s]
s = [x.replace(' (嵌入) (←链接 | 编辑)', '') for x in s]
if t == 'int':
s = [int(x) for x in s]
return s
def fromfile(s, t='str'):
with open(s, 'r') as f:
# 自动close
return fromclip(f.read(), t)
# https://stackoverflow.com/questions/6568007/how-do-i-save-and-restore-multiple-variables-in-python/6568495
def savepckl(d, file="data.pckl"):
if not isinstance(d, list):
d = [d]
if any(not isinstance(s, str) for s in d):
print('data should be a string or a list of strings')
return
g = superglobals()
dct = {}
for s in d:
dct[s] = g[s]
with open(file, 'wb') as f:
pickle.dump([d, dct], f)
def loadpckl(file="data.pckl"):
with open(file, 'rb') as f:
d, dct = pickle.load(f)
print(d)
g = superglobals()
for s in d:
g[s] = dct[s]
# import readline
# readline.write_history_file('python_history.txt')
# https://stackoverflow.com/questions/6558765/how-do-you-see-the-entire-command-history-in-interactive-python