User:CrowleyBot/lib/prelude.py

import mwclient as mwc
import wikitextparser as wtp
import zhconv as zhc

import os, sys, time, re, inspect
import subprocess as subp
from contextlib import redirect_stdout
# with open('/tmp/tmp', 'w') as f, redirect_stdout(f): print('123\n456')

from fishhook import *
import itertools as its
import more_itertools as mits
its.__dict__.update(mits.__dict__)
import operator
import functools as fct
import bisect

import collections
from collections import defaultdict
import copy
import csv
import pickle

mwc.page.Page.embed = mwc.page.Page.embeddedin

#@hook(int)
#def __add__(self, other):
#    return orig(self, other)

@hook_cls(str)
class str_hook:
    def replacen(self, pattern, repl, count=0):
        # 支持\g<0>
        return re.subn(re.escape(pattern), repl, self, count=count)
    
    def replacebang(self, a, b):
        self = self.replace(a, b)
    
    def subbang(self, pattern, repl, count=0, flags=0):
        self = re.sub(pattern, repl, self, count, flags)
    
    def wipec(self, s):
        return re.sub(r'[' + re.escape(s) + r']', '', self)
    
    def wipel(self, l):
        return re.sub(r'(' + '|'.join(map(re.escape, l)) + r')', '', self)
    
    def wipecl(self, s, l):
        return re.sub(r'([' + re.escape(s) + r']|' + '|'.join(map(re.escape, l)) + r')', '', self)
    
    def findo(self, s):
        return self.find(s) != -1
    
    def findom(self, s):
        return re.search(re.escape(s), self)
    
    def findr(self, s):
        return re.search(s, self)
    
    def finds(self, s):
        return re.search(r'(%s|%s)' % (simp(s), comp(s)), self)
    
    def findsl(self, l):
        l = [simp(s) for s in l] + [comp(s) for s in l]
        return re.search(r'(' + '|'.join(map(re.escape, l)) + r')', self)
    
    def findol(self, l):
        return re.search(r'(' + '|'.join(map(re.escape, l)) + r')', self)
    
    def simp(self):
        return zhc.convert(self, 'zh-cn')
    
    def comp(self):
        return zhc.convert(self, 'zh-tw')

i, j = 0, 0
def search(site, search="", namespace=0, limit=None, generator=True):
    """执行一次全文本搜索。

    API doc: https://www.mediawiki.org/wiki/API:Search

    Args:
        namespace (int): Restricts search to a given namespace (Default: None)
        limit (int): Maximum amount of pages to return per request
        generator (bool): Return generator (Default: True)

    Returns:
        mwclient.listings.List: Page iterator
    """
    pfx = mwc.listing.List.get_prefix('sr', generator)
    kwargs = dict(mwc.listing.List.generate_kwargs(pfx,
        namespace=namespace, search=search, prop=None))
    return mwc.listing.List.get_list(generator)(
        site, 'search', 'sr', limit=limit, return_values='title',
        **kwargs
    )

def prefixsearch(site, search="", namespace=0, profile='engine_autoselect', limit=None, generator=True):
    """为页面标题执行前缀搜索。

    API doc: https://www.mediawiki.org/wiki/API:Prefixsearch

    Args:
        namespace (int): Restricts search to a given namespace (Default: None)
        limit (int): Maximum amount of pages to return per request
        generator (bool): Return generator (Default: True)

    Returns:
        mwclient.listings.List: Page iterator
    """
    pfx = mwc.listing.List.get_prefix('ps', generator)
    kwargs = dict(mwc.listing.List.generate_kwargs(
        pfx, search=search, namespace=namespace, profile=profile
    ))
    return mwc.listing.List.get_list(generator)(
        site, 'prefixsearch', 'ps', 
        limit=limit, return_values='title',  **kwargs
    )

def categorylist(site, title, namespace=0, sort="sortkey", starthexsortkey=None, endhexsortkey=None, startsortkeyprefix=None, endsortkeyprefix=None, limit=None, generator=True):
    """在指定的分类中列出所有页面。

    API doc: https://www.mediawiki.org/wiki/API:Categorymembers

    Args:
        namespace (int): Restricts search to a given namespace (Default: None)
        limit (int): Maximum amount of pages to return per request
        generator (bool): Return generator (Default: True)

    Returns:
        mwclient.listings.List: Page iterator
    """
    pfx = mwc.listing.List.get_prefix('cm', generator)
    kwargs = dict(mwc.listing.List.generate_kwargs(pfx, prop=None,
        title=title, namespace=namespace, sort=sort, 
        starthexsortkey=starthexsortkey, endhexsortkey=endhexsortkey,
        startsortkeyprefix=startsortkeyprefix, endsortkeyprefix=endsortkeyprefix))
    return mwc.listing.List.get_list(generator)(
        site, 'categorymembers', 'cm', limit=limit, return_values='title',
        **kwargs
    )

catcat = categorylist

def tryedit(page, text, summary="", SLP=0, fail=None, failformat="(99, %s)", origin=None, minor=True, bot=True):
    if text == origin:
        return True
    for _ in range(5):
        try:
            page.edit(text, summary, minor=minor, bot=bot)
            return True
        except:
            time.sleep(SLP)
    if fail != None:
        if callable(fail):
            fail()
            return False
        fail.append(failformat % (page.name,))
    return False

# reliable
def getepg(pg):
    try:
        return en.Pages[dict(pg.langlinks())['en']]
    except:
        return None

def simp(s):
    return zhc.convert(s, 'zh-cn')

def comp(s):
    return zhc.convert(s, 'zh-tw')

def fake(s):
    return s.replace("{", "⦃").replace("|", "¦").replace("=", "≈").replace("}", "⦄")

def savedata(d, file="data.py"):
    if not isinstance(d, list):
        d = [d]
    if any(not isinstance(s, str) for s in d):
        print('data should be a string or a list of strings')
        return
    g = globals()
    with open(file, 'w') as f:
        print('# %s' % d, file=f)
        for s in d:
            print('%s = %s' % (s, g[s]), file=f)

def postlist(pg, pn, summary='', bot=False):
    s = '# <li value="0">[[%s]]</li>\n' % pn[0]
    for i in range(1, len(pn)):
        s += '# [[%s]]\n' % pn[i]
    tryedit(pg, s, summary, bot=bot)

def createjs(site, title, text, prefix='User:CrowleyBot/t/'):
    p = site.Page['User:CrowleyBot/template.js']
    if tryedit(p, text):
        p.move(prefix + title, no_redirect=True)
    else:
        return False

# https://stackoverflow.com/questions/15959534/visibility-of-global-variables-in-imported-modules/69029612
def superglobals():
    _globals = dict(inspect.getmembers(
                inspect.stack()[len(inspect.stack()) - 1][0]))["f_globals"]
    return _globals

def fromclip(s, t='str'):
    s = s.split('\n')
    s = [x.strip() for x in s if not x.strip() == '']
    s = [x.replace('<title>', '').replace('</title>', '') for x in s]
    s = [x.replace(' (嵌入) ‎ (←链接 | 编辑)', '') for x in s]
    if t == 'int':
        s = [int(x) for x in s]
    return s

def fromfile(s, t='str'):
    with open(s, 'r') as f:
        # 自动close
        return fromclip(f.read(), t)

# https://stackoverflow.com/questions/6568007/how-do-i-save-and-restore-multiple-variables-in-python/6568495
def savepckl(d, file="data.pckl"):
    if not isinstance(d, list):
        d = [d]
    if any(not isinstance(s, str) for s in d):
        print('data should be a string or a list of strings')
        return
    g = superglobals()
    dct = {}
    for s in d:
        dct[s] = g[s]
    with open(file, 'wb') as f:
        pickle.dump([d, dct], f)

def loadpckl(file="data.pckl"):
    with open(file, 'rb') as f:
        d, dct = pickle.load(f)
        print(d)
        g = superglobals()
        for s in d:
            g[s] = dct[s]

# import readline
# readline.write_history_file('python_history.txt')
# https://stackoverflow.com/questions/6558765/how-do-you-see-the-entire-command-history-in-interactive-python