local export = {}
local pos_functions = {}
local force_cat = false -- for testing; if true, categories appear in non-mainspace pages
local m_links = require("Module:links")
local table_module = "Module:table"
local headword_utilities_module = "Module:headword utilities"
local string_utilities_module = "Module:string utilities"
local lang = require("Module:languages").getByCode("en")
local langname = lang:getCanonicalName()
local rsplit = mw.text.split
local function glossary_link(entry, text)
text = text or entry
return "[[Appendix:Glossary#" .. entry .. "|" .. text .. "]]"
end
local function track(page)
require("Module:debug/track")("en-headword/" .. page)
return true
end
-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local params = {
["head"] = {list = true},
["id"] = {},
["json"] = {type = "boolean"},
["sort"] = {},
["splithyph"] = {type = "boolean"},
["nolink"] = {type = "boolean"},
["nolinkhead"] = {type = "boolean", alias_of = "nolink"},
["hyphspace"] = {type = "boolean"},
["nolinkhead"] = {type = "boolean"},
["nosuffix"] = {type = "boolean"},
["nomultiwordcat"] = {type = "boolean"},
["pagename"] = {}, -- for testing
}
local pos_data = pos_functions[poscat]
if pos_data then
for key, val in pairs(pos_data.params) do
params[key] = val
end
end
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "en-headword", "show")
local pagename = args.pagename or mw.loadData("Module:headword/data").pagename -- Accounts for unsupported titles.
local user_specified_heads = args.head
local heads = user_specified_heads
local autohead
if args.nolink or not pagename:find("[ '%-]") then
autohead = pagename
else
local m_headutil = require(headword_utilities_module)
local en_no_split_apostrophe_words = require("Module:table/listToSet") {
"one's",
"someone's",
"he's",
"she's",
"it's",
}
local en_include_hyphen_prefixes = require("Module:table/listToSet") {
-- We don't include things that are also words even though they are often (perhaps mostly) prefixes, e.g.
-- "be", "counter", "cross", "extra", "half", "mid", "over", "pan", "under".
"acro",
"acousto",
"Afro",
"agro",
"anarcho",
"angio",
"Anglo",
"ante",
"anti",
"arch",
"auto",
"bi",
"bio",
"cis",
"co",
"cryo",
"crypto",
"de",
"demi",
"eco",
"electro",
"Euro",
"ex",
"Greco",
"hemi",
"hydro",
"hyper",
"hypo",
"infra",
"Indo",
"inter",
"intra",
"Judeo",
"macro",
"meta",
"micro",
"mini",
"multi",
"neo",
"neuro",
"non",
"para",
"peri",
"post",
"pre",
"pro",
"proto",
"pseudo",
"re",
"semi",
"sub",
"super",
"trans",
"un",
"vice",
}
local function is_english(term)
local title = mw.title.new(term)
if title and title.exists then
local content = title:getContent()
if content and content:find("==English==\n") then
return true
end
end
return false
end
local function en_split_hyphen_when_space(word)
if not word:find("%-") then
return nil
end
if args.hyphspace then
return "[[" .. word:gsub("%-", " ") .. "|" .. word .. "]]"
end
if args.nosplithyph then
return "[[" .. word .. "]]"
end
if not args.splithyph then
local space_word = word:gsub("%-", " ")
if is_english(space_word) then
return "[[" .. space_word .. "|" .. word .. "]]"
end
if is_english(word) then
return "[[" .. word .. "]]"
end
end
return nil
end
local function en_split_apostrophe(word)
local base = word:match("^(.*)'s$")
if base then
return "[[" .. base .. "]][[-'s|'s]]"
end
base = word:match("^(.*)'$")
if base then
if base:find("s$") then
local sg = require(string_utilities_module).singularize(base)
if is_english(sg) then
return "[[" .. sg .. "|" .. base .. "]][[-'|']]"
end
end
return "[[" .. base .. "]][[-'|']]"
end
return "[[" .. word .. "]]"
end
autohead = m_headutil.add_links_to_multiword_term(pagename, {
split_hyphen_when_space = en_split_hyphen_when_space,
split_apostrophe = en_split_apostrophe,
no_split_apostrophe_words = en_no_split_apostrophe_words,
include_hyphen_prefixes = en_include_hyphen_prefixes,
})
end
if #heads == 0 then
heads = {autohead}
else
for i, head in ipairs(heads) do
if head:find("^~") then
head = require(headword_utilities_module).apply_link_modifiers(autohead, head:sub(2))
heads[i] = head
end
if head == autohead then
track("redundant-head")
end
end
end
local data = {
lang = lang,
pos_category = poscat,
categories = {},
heads = heads,
user_specified_heads = user_specified_heads,
no_redundant_head_cat = #user_specified_heads == 0,
inflections = {},
nomultiwordcat = args.nomultiwordcat,
sort_key = args.sort,
pagename = args.pagename,
-- This is always set, and in the case of unsupported titles, it's the displayed version (e.g. 'C|N>K' instead of
-- 'Unsupported titles/C through N to K').
displayed_pagename = pagename,
id = args.id,
force_cat_output = force_cat,
}
local is_suffix = false
if not args.nosuffix and pagename:find("^%-") and not pagename:find("^%-%-") and poscat ~= "suffix forms" then
is_suffix = true
data.pos_category = "後綴"
local singular_poscat = require("Module:string utilities").singularize(poscat)
table.insert(data.categories, "構成" .. singular_poscat .. "的" .. langname .. "後綴")
table.insert(data.inflections, {label = "構成" .. singular_poscat .. "的後綴"})
end
if pos_data then
pos_data.func(args, data, is_suffix)
end
local extra_categories = {}
if pagename:find("[Qq][^Uu]") or pagename:find("[Qq]$") then
table.insert(data.categories, "Q後不接U的" .. langname .. "詞")
end
-- mw.ustring.toNFD performs decomposition, so letters that decompose
-- to an ASCII vowel and a diacritic, such as é, are counted as vowels and
-- do not need to be included in the pattern.
if not mw.ustring.find(mw.ustring.lower(mw.ustring.toNFD(pagename)), "[aeiouyæœø]") then
table.insert(data.categories, "無元音的" .. langname .. "詞")
end
if pagename:find("yre$") then
table.insert(data.categories, "以-yre結尾的" .. langname .. "詞")
end
if not pagename:find(" ") and mw.ustring.len(pagename) >= 25 then
table.insert(extra_categories, langname .. '長單詞')
end
if pagename:find("^[^aeiou ]*a[^aeiou ]*e[^aeiou ]*i[^aeiou ]*o[^aeiou ]*u[^aeiou ]*$") then
table.insert(data.categories, "按字母順序使用了所有元音的" .. langname .. "詞")
end
if args.json then
return require("Module:JSON").toJSON(data)
end
return require("Module:headword").full_headword(data)
.. (#extra_categories > 0
and require("Module:utilities").format_categories(extra_categories, lang, args.sort)
or "")
end
-- This function does the common work between adjectives and adverbs
local function make_comparatives(params, data)
local comp_parts = {label = glossary_link("比較級"), accel = {form = "comparative"}}
local sup_parts = {label = glossary_link("最高級"), accel = {form = "superlative"}}
local pagename = data.displayed_pagename
if #params == 0 then
table.insert(params, {"more"})
end
-- To form the stem, replace -(e)y with -i and remove a final -e.
local stem = pagename:gsub("([^aeiou])e?y$", "%1i"):gsub("e$", "")
-- Go over each parameter given and create a comparative and superlative form
for i, val in ipairs(params) do
local comp = val[1]
local comp_qual = val[2]
local sup = val[3]
local sup_qual = val[4]
local comp_part, sup_part
if comp == "more" and pagename ~= "many" and pagename ~= "much" then
comp_part = "more [[" .. pagename .. "]]"
sup_part = "most [[" .. pagename .. "]]"
elseif comp == "further" and pagename ~= "far" then
comp_part = "further [[" .. pagename .. "]]"
sup_part = "furthest [[" .. pagename .. "]]"
elseif comp == "er" then
comp_part = stem .. "er"
sup_part = stem .. "est"
elseif comp == "-" or sup == "-" then
-- Allowing '-' makes it more flexible to not have some forms
if comp ~= "-" then
comp_part = comp
end
if sup ~= "-" then
sup_part = sup
end
else
-- If the full comparative was given, but no superlative, then
-- create it by replacing the ending -er with -est.
if not sup then
if comp:find("er$") then
sup = comp:gsub("er$", "est")
else
error("The superlative of \"" .. comp .. "\" cannot be generated automatically. Please provide it with the \"sup" .. (i == 1 and "" or i) .. "=\" parameter.")
end
end
comp_part = comp
sup_part = sup
end
if comp_part then
table.insert(comp_parts, {term = comp_part, q = {comp_qual}})
end
if sup_part then
table.insert(sup_parts, {term = sup_part, q = {sup_qual}})
end
end
table.insert(data.inflections, comp_parts)
table.insert(data.inflections, sup_parts)
end
local function make_heads_definite(args, data)
if args.def == "~" then
local newheads = {}
for i, head in ipairs(data.heads) do
table.insert(newheads, head)
table.insert(newheads, "the " .. head)
end
data.heads = newheads
else
for i, head in ipairs(data.heads) do
data.heads[i] = "the " .. head
end
end
end
pos_functions["adjectives"] = {
params = {
[1] = {list = true, allow_holes = true},
["def"] = {},
["the"] = {alias_of = "def"},
["comp_qual"] = {list = "comp\1_qual", allow_holes = true},
["sup"] = {list = true, allow_holes = true},
["sup_qual"] = {list = "sup\1_qual", allow_holes = true},
},
func = function(args, data)
local shift = 0
local is_not_comparable = false
local is_comparative_only = false
if args.def then
make_heads_definite(args, data)
end
-- If the first parameter is ?, then don't show anything, just return.
if args[1][1] == "?" then
return
-- If the first parameter is -, then move all parameters up one position.
elseif args[1][1] == "-" then
shift = 1
is_not_comparable = true
-- If the only argument is +, then remember this and clear parameters
elseif args[1][1] == "+" and args[1].maxindex == 1 then
shift = 1
is_comparative_only = true
end
-- Gather all the comparative and superlative parameters.
local params = {}
for i = 1, args[1].maxindex - shift do
local comp = args[1][i + shift]
local comp_qual = args["comp_qual"][i + shift]
local sup = args["sup"][i]
local sup_qual = args["sup_qual"][i + shift]
if comp or sup then
table.insert(params, {comp, comp_qual, sup, sup_qual})
end
end
if shift == 1 then
-- If the first parameter is "-" but there are no parameters,
-- then show "not comparable" only and return.
-- If there are parameters, then show "not generally comparable"
-- before the forms.
if #params == 0 then
if is_not_comparable then
table.insert(data.inflections, {label = "無" .. glossary_link("比較級")})
table.insert(data.categories, langname .. "無比較級形容詞")
return
end
if is_comparative_only then
table.insert(data.inflections, {label = "僅用" .. glossary_link("比較級")})
table.insert(data.categories, langname .. "僅比較級形容詞")
return
end
else
table.insert(data.inflections, {label = "一般不用" .. glossary_link("比較級")})
end
end
-- Process the parameters
make_comparatives(params, data)
end
}
pos_functions["形容詞"] = pos_functions["adjectives"]
pos_functions["adverbs"] = {
params = {
[1] = {list = true, allow_holes = true},
["comp_qual"] = {list = "comp\1_qual", allow_holes = true},
["sup"] = {list = true, allow_holes = true},
["sup_qual"] = {list = "sup\1_qual", allow_holes = true},
},
func = function(args, data)
local shift = 0
-- If the first parameter is ?, then don't show anything, just return.
if args[1][1] == "?" then
return
-- If the first parameter is -, then move all parameters up one position.
elseif args[1][1] == "-" then
shift = 1
end
-- Gather all the comparative and superlative parameters.
local params = {}
for i = 1, args[1].maxindex - shift do
local comp = args[1][i + shift]
local comp_qual = args["comp_qual"][i + shift]
local sup = args["sup"][i]
local sup_qual = args["sup_qual"][i + shift]
if comp or sup then
table.insert(params, {comp, comp_qual, sup, sup_qual})
end
end
if shift == 1 then
-- If the first parameter is "-" but there are no parameters,
-- then show "not comparable" only and return. If there are parameters,
-- then show "not generally comparable" before the forms.
if #params == 0 then
table.insert(data.inflections, {label = "不" .. glossary_link("可比")})
table.insert(data.categories, langname .. "無比較級副詞")
return
else
table.insert(data.inflections, {label = "一般不" .. glossary_link("可比")})
end
end
-- Process the parameters
make_comparatives(params, data)
end
}
pos_functions["副詞"] = pos_functions["adverbs"]
pos_functions["conjunctions"] = {
params = {
[1] = { alias_of = "head" },
},
func = function(args, data)
end,
}
pos_functions["連詞"] = pos_functions["conjunctions"]
pos_functions["interjections"] = {
params = {
[1] = { alias_of = "head" },
},
func = function(args, data)
end,
}
pos_functions["感嘆詞"] = pos_functions["interjections"]
local function default_plural(noun)
local new_pl
if noun:find("[sxz]$") or noun:find("[cs]h$") then
new_pl = noun .. "es"
elseif noun:find("[^aeiou]y$") then
new_pl = noun:gsub("y$", "i") .. "es"
else
new_pl = noun .. "s"
end
return (new_pl
:gsub("\\([:#])", "\\\\%1")
:gsub("[:#]", "\\%0"))
end
local function canonicalize_plural(pl, stem, pagename)
local can_pl
if pl == "s" then
can_pl = stem .. "s"
elseif pl == "es" then
can_pl = stem .. "es"
elseif pl == "+" then
return default_plural(pagename)
else
return nil
end
return (can_pl
:gsub("\\([:#])", "\\\\%1")
:gsub("[:#]", "\\%0"))
end
local function do_nouns(args, data, is_proper)
local pagename = data.displayed_pagename
local function gather_inflections_with_quals(infl_field, qual_field, label)
-- Gather all the plural parameters from the numbered parameters.
local infls = {}
if label then
infls.label = label
end
for i, infl in ipairs(args[infl_field]) do
local qual = args[qual_field][i]
if qual then
table.insert(infls, {term = infl, q = {qual}})
else
table.insert(infls, infl)
end
end
return infls
end
if args.def then
make_heads_definite(args, data)
end
local plurals = gather_inflections_with_quals(1, "plqual")
if plurals[1] == "p" then
-- plurale tantum
if #plurals > 1 then
error("With plurale tantum noun, can't specify more than one plural")
end
data.genders = {"p"} -- this should auto-insert the correct 'pluralia tantum' category
if #args.sg > 0 then
table.insert(data.inflections, {label = "通常複數"})
table.insert(data.inflections, gather_inflections_with_quals("sg", "sgqual", "singular"))
else
table.insert(data.inflections, {label = "唯複"})
end
if #args.attr > 0 then
table.insert(data.inflections, gather_inflections_with_quals("attr", "attrqual", "attributive"))
end
return
end
local need_default_plural = not is_proper
if plurals[1] == "-" then
-- Uncountable noun; may occasionally have a plural
table.remove(plurals, 1) -- Remove the "-"
table.insert(data.categories, langname .. "不可數名詞")
-- If plural forms were given explicitly, then show "usually"
if #plurals > 0 then
table.insert(data.inflections, {label = "通常" .. glossary_link("不可數")})
table.insert(data.categories, langname .. "可數名詞")
else
table.insert(data.inflections, {label = glossary_link("不可數")})
end
need_default_plural = false
elseif plurals[1] == "~" then
-- Mixed countable/uncountable noun, always has a plural
table.remove(plurals, 1) -- Remove the "~"
table.insert(data.inflections, {label = glossary_link("可數") .. " <small>和</small> " .. glossary_link("不可數")})
table.insert(data.categories, langname .. "不可數名詞")
table.insert(data.categories, langname .. "可數名詞")
-- If no plural was given, add a default one now
if #plurals == 0 then
plurals = {default_plural(pagename)}
end
elseif is_proper then
-- For proper nouns, the default is uncountable
table.insert(data.categories, langname .. "不可數名詞")
else
-- For common nouns, the default is countable, has a plural
table.insert(data.categories, langname .. "可數名詞")
end
-- Plural is unknown
if plurals[1] == "?" then
table.remove(plurals, 1) -- Remove the "?"
-- Not desired; see [[Wiktionary:Tea_room/2021/August#"Plural unknown or uncertain"]]
-- table.insert(data.inflections, {label = "plural unknown or uncertain"})
table.insert(data.categories, "複數形式不確定或無紀錄的" .. langname .. "名詞")
if #plurals > 0 then
error("Can't specify explicit plurals along with '?' for unknown/uncertain plural")
end
return
end
-- Plural is not attested
if plurals[1] == "!" then
table.remove(plurals, 1) -- Remove the "!"
table.insert(data.inflections, {label = "複數無記錄"})
table.insert(data.categories, "複數形式無記錄的" .. langname .. "名詞")
if #plurals > 0 then
error("Can't specify explicit plurals along with '!' for unattested plural")
end
return
end
-- If no plural was given, maybe add a default one, otherwise (when "-" was given) return
if #plurals == 0 then
if need_default_plural then
plurals = {default_plural(pagename)}
else
return
end
end
-- There are plural forms to show, so show them
local pl_parts = {label = "複數", accel = {form = "p"}}
local function check_ies(pl, stem)
local newplural, nummatches = stem:gsub("([^aeiou])y$","%1ies")
return nummatches > 0 and pl == newplural
end
local stem = pagename
local irregular = false
for i, pl in ipairs(plurals) do
local canon_pl = canonicalize_plural(pl, stem, pagename)
if canon_pl then
table.insert(pl_parts, canon_pl)
elseif type(pl) == "table" then
canon_pl = canonicalize_plural(pl.term, stem, pagename)
if canon_pl then
table.insert(pl_parts, {term = canon_pl, q = pl.q})
end
end
if not canon_pl then
table.insert(pl_parts, pl)
if type(pl) == "table" then
pl = pl.term
end
local check_pl = m_links.get_link_page(pl, lang)
if not stem:find(" ") and not (check_pl == stem .. "s" or check_pl == stem .. "es" or check_ies(check_pl, stem)) then
irregular = true
if check_pl == stem then
table.insert(data.categories, langname .. "無變化名詞")
end
end
end
end
if irregular then
table.insert(data.categories, "複數不規則的" .. langname .. "名詞")
end
table.insert(data.inflections, pl_parts)
end
-- Return the parameters to be used for nouns and proper nouns. Currently the same.
local function get_noun_params(is_proper)
return {
[1] = {list = true, disallow_holes = true},
["def"] = {},
["the"] = {alias_of = "def"},
["pl\1qual"] = {list = true, allow_holes = true},
-- The following four only used for pluralia tantum (1=p)
["sg"] = {list = true, disallow_holes = true},
["sg\1qual"] = {list = true, allow_holes = true},
["attr"] = {list = true, disallow_holes = true},
["attr\1qual"] = {list = true, allow_holes = true},
}
end
pos_functions["nouns"] = {
params = get_noun_params(false),
func = do_nouns,
}
pos_functions["proper nouns"] = {
params = get_noun_params("is proper"),
func = function(args, data) return do_nouns(args, data, "is proper") end,
}
pos_functions["名詞"] = pos_functions["nouns"]
pos_functions["專有名詞"] = pos_functions["proper nouns"]
local function base_default_verb_forms(verb)
local s_form = default_plural(verb)
local ing_form, ed_form
local vowel = "aeiouáéíóúàèìòùâêîôûäëïöüæœø"
local ulvowel = vowel .. "AEIOUÁÉÍÓÚÀÈÌÒÙÂÊÎÔÛÄËÏÖÜÆŒØ"
-- (1) Check for C*VC verbs.
--
-- flip -> flipping/flipped, strum -> strumming/strummed, nag -> nagging/nagged, etc.
-- Do not include words with final -y, e.g. 'stay' (staying/stayed), 'toy' (toying/toyed),
-- or with final -w, e.g. 'flow' (flowing/flowed), or with final -h, e.g. 'ah' (ahing/ahed),
-- or with final -x, e.g. 'box' (boxing/boxed), or ending in an uppercase consonant,
-- e.g. 'XOR' (XORing/XORed), 'OK' (OKing/OKed). Check specially for initial y- as a consonant,
-- e.g. 'yip' (yipping/yipped), otherwise treat y as a vowel, so we don't trigger on 'hyphen'
-- but do trigger on 'gyp'.
local last_cons = mw.ustring.match(verb, "^[Yy][" .. vowel .. "y]([^A-Z" .. vowel .. "ywxh])$")
if not last_cons then
last_cons = mw.ustring.match(verb, "^[^" .. ulvowel .. "yY]*[" .. ulvowel .. "yY]([^A-Z" .. vowel .. "ywxh])$")
end
if last_cons then
ing_form = verb .. last_cons .. "ing"
ed_form = verb .. last_cons .. "ed"
else
-- (2) Generate -ing form.
-- (2a) lie -> lying, untie -> untying, etc.
local stem = verb:match("^(.*)ie$")
if stem then
ing_form = stem .. "ying"
else
-- (2b) argue -> arguing, sprue -> spruing, dialogue -> dialoguing, etc.
stem = verb:match("^(.*)ue$")
if stem then
ing_form = stem .. "uing"
else
stem = mw.ustring.match(verb, "^(.*[" .. ulvowel .. "yY][^" .. vowel .. "y]+)e$")
if stem then
-- (2c) baptize -> baptizing, rake -> raking, type -> typing, parse -> parsing, etc.
-- (ending in vowel + consonant(s) + -e); but not referee -> refereeing,
-- backhoe -> backhoeing, redye -> redyeing (ending in some other vowel + -e or in -ye);
-- and not be -> being (no vowel before the consonant preceding the -e)
ing_form = stem .. "ing"
else
-- (2d) regular verbs
ing_form = verb .. "ing"
end
end
end
-- (3) Generate -ed form.
if verb:find("e$") then
-- (3a) baptize -> baptized, rake -> raked, parse -> parsed, free -> freed, hoe -> hoed
ed_form = verb .. "d"
else
stem = mw.ustring.match(verb, "^(.*[^" .. ulvowel .. "yY])y$")
if stem then
-- (3b) marry -> married, levy -> levied, try -> tried, etc.; but not toy -> toyed
ed_form = stem .. "ied"
else
-- (3c) regular verbs
ed_form = verb .. "ed"
end
end
end
ing_form = ing_form
:gsub("\\([:#])", "\\\\%1")
:gsub("[:#]", "\\%0")
ed_form = ed_form
:gsub("\\([:#])", "\\\\%1")
:gsub("[:#]", "\\%0")
return s_form, ing_form, ed_form
end
local function default_verb_forms(verb)
local full_s_form, full_ing_form, full_ed_form = base_default_verb_forms(verb)
if verb:find(" ") then
local first, rest = verb:match("^(.-)( .*)$")
local first_s_form, first_ing_form, first_ed_form = base_default_verb_forms(first)
return full_s_form, full_ing_form, full_ed_form, first_s_form .. rest, first_ing_form .. rest, first_ed_form .. rest
else
return full_s_form, full_ing_form, full_ed_form, nil, nil, nil
end
end
pos_functions["verbs"] = {
params = {
[1] = {list = "pres_3sg", allow_holes = true},
["pres_3sg_qual"] = {list = "pres_3sg\1_qual", allow_holes = true},
[2] = {list = "pres_ptc", allow_holes = true},
["pres_ptc_qual"] = {list = "pres_ptc\1_qual", allow_holes = true},
[3] = {list = "past", allow_holes = true},
["past_qual"] = {list = "past\1_qual", allow_holes = true},
[4] = {list = "past_ptc", allow_holes = true},
["past_ptc_qual"] = {list = "past_ptc\1_qual", allow_holes = true},
["noautolinkverb"] = {type = "boolean"},
},
func = function(args, data)
-- Get parameters
local par1 = args[1][1]
local par2 = args[2][1]
local par3 = args[3][1]
local par4 = args[4][1]
local pres_3sgs, pres_ptcs, pasts, past_ptcs
local pagename = data.displayed_pagename
------------------------------------------- UTILITY FUNCTIONS #1 ------------------------------------------
-- These functions are used directly in the <> format as well as in the utility functions #2 below.
local function compute_double_last_cons_stem(verb)
local last_cons = verb:match("([bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ])$")
if not last_cons then
error("Verb stem '" .. verb .. "' must end in a consonant to use ++")
end
return verb .. last_cons
end
local function compute_plusplus_s_form(verb, default_s_form)
if verb:find("[sz]$") then
-- regas -> regasses, derez -> derezzes
return compute_double_last_cons_stem(verb) .. "es"
else
return default_s_form
end
end
------------------------------------------- UTILITY FUNCTIONS #2 ------------------------------------------
-- These functions are used in both in the separate-parameter format and in the override params such as past_ptc2=.
local new_default_s, new_default_ing, new_default_ed, split_default_s, split_default_ing, split_default_ed =
default_verb_forms(pagename)
local function compute_double_last_cons_stem_of_split_verb(verb, ending)
local first, rest = verb:match("^(.-)( .*)$")
if not first then
error("Verb '" .. verb .. "' must have a space in it to use ++*")
end
local last_cons = first:match("([bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ])$")
if not last_cons then
error("First word '" .. first .. "' must end in a consonant to use ++*")
end
return first .. last_cons .. ending .. rest
end
local function check_non_nil_star_form(form)
if form == nil then
error("Verb '" .. pagename .. "' must have a space in it to use * or ++*")
end
return form
end
local function sub_tilde(form)
if not form then
return nil
end
local retval = form:gsub("~", pagename) -- discard second return value
return retval
end
local function canonicalize_s_form(form)
if form == "+" then
return new_default_s
elseif form == "*" then
return check_non_nil_star_form(split_default_s)
elseif form == "++" then
return compute_plusplus_s_form(pagename, new_default_s)
elseif form == "++*" then
if pagename:find("^[^ ]*[sz] ") then
return compute_double_last_cons_stem_of_split_verb(pagename, "es")
else
return check_non_nil_star_form(split_default_s)
end
else
return sub_tilde(form)
end
end
local function canonicalize_ing_form(form)
if form == "+" then
return new_default_ing
elseif form == "*" then
return check_non_nil_star_form(split_default_ing)
elseif form == "++" then
return compute_double_last_cons_stem(pagename) .. "ing"
elseif form == "++*" then
return compute_double_last_cons_stem_of_split_verb(pagename, "ing")
else
return sub_tilde(form)
end
end
local function canonicalize_ed_form(form)
if form == "+" then
return new_default_ed
elseif form == "*" then
return check_non_nil_star_form(split_default_ed)
elseif form == "++" then
return compute_double_last_cons_stem(pagename) .. "ed"
elseif form == "++*" then
return compute_double_last_cons_stem_of_split_verb(pagename, "ed")
else
return sub_tilde(form)
end
end
--------------------------------- MAIN PARSING/CONJUGATING CODE --------------------------------
local past_ptcs_given
if par1 and par1:find("<") then
-------------------------- ANGLE-BRACKET FORMAT --------------------------
if par2 or par3 or par4 then
error("Can't specify 2=, 3= or 4= when 1= contains angle brackets: " .. par1)
end
-- In the angle bracket format, we always copy the full past tense specs to the past participle
-- specs if none of the latter are given, so act as if the past participle is always given.
-- There is a separate check to see if the past tense and past participle are identical, in any case.
past_ptcs_given = true
local iut = require("Module:inflection utilities")
-- (1) Parse the indicator specs inside of angle brackets.
local function parse_indicator_spec(angle_bracket_spec)
local inside = angle_bracket_spec:match("^<(.*)>$")
assert(inside)
local segments = iut.parse_balanced_segment_run(inside, "[", "]")
local comma_separated_groups = iut.split_alternating_runs(segments, ",")
if #comma_separated_groups > 4 then
error("Too many comma-separated parts in indicator spec: " .. angle_bracket_spec)
end
local function fetch_qualifiers(separated_group)
local qualifiers
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error("Extraneous text after bracketed qualifiers: '" .. table.concat(separated_group) .. "'")
end
if not qualifiers then
qualifiers = {}
end
table.insert(qualifiers, separated_group[j])
end
return qualifiers
end
local function fetch_specs(comma_separated_group)
if not comma_separated_group then
return {{}}
end
local specs = {}
local colon_separated_groups = iut.split_alternating_runs(comma_separated_group, ":")
for _, colon_separated_group in ipairs(colon_separated_groups) do
local form = colon_separated_group[1]
if form == "*" or form == "++*" then
error("* and ++* not allowed inside of indicator specs: " .. angle_bracket_spec)
end
if form == "" then
form = nil
end
table.insert(specs, {form = form, q = fetch_qualifiers(colon_separated_group)})
end
return specs
end
local s_specs = fetch_specs(comma_separated_groups[1])
local ing_specs = fetch_specs(comma_separated_groups[2])
local ed_specs = fetch_specs(comma_separated_groups[3])
local en_specs = fetch_specs(comma_separated_groups[4])
for _, spec in ipairs(s_specs) do
if spec.form == "++" and #ing_specs == 1 and not ing_specs[1].form and not ing_specs[1].q
and #ed_specs == 1 and not ed_specs[1].form and not ed_specs[1].q then
ing_specs[1].form = "++"
ed_specs[1].form = "++"
break
end
end
return {
forms = {},
s_specs = s_specs,
ing_specs = ing_specs,
ed_specs = ed_specs,
en_specs = en_specs,
}
end
local parse_props = {
parse_indicator_spec = parse_indicator_spec,
}
local alternant_multiword_spec = iut.parse_inflected_text(par1, parse_props)
-- (2) Check for user-specified brackets; remove any links from the lemma, but remember the original
-- form so we can use it below in the 'lemma_linked' form.
-- Check to see if there are brackets in the pre-text or post-text. If so, use the linked lemma (with the
-- verb autolinked unless noautolinkverb is given). Otherwise, use the default headword algorithm.
local function check_bracket(val)
if val:find("%[%[") then
alternant_multiword_spec.saw_bracket = true
end
end
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
check_bracket(alternant_or_word_spec.before_text)
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for _, word_spec in ipairs(multiword_spec.word_specs) do
check_bracket(word_spec.before_text)
end
check_bracket(multiword_spec.post_text)
end
end
end
check_bracket(alternant_multiword_spec.post_text)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.lemma == "" then
base.lemma = pagename
end
base.orig_lemma = base.lemma
base.lemma = m_links.remove_links(base.lemma)
if args.noautolinkverb or base.orig_lemma:find("%[%[") then
base.linked_lemma = base.orig_lemma
else
base.linked_lemma = "[[" .. base.orig_lemma .. "]]"
end
end)
-- (3) Conjugate the verbs according to the indicator specs parsed above.
local all_verb_slots = {
lemma = "infinitive",
lemma_linked = "infinitive",
s_form = "3|s|pres",
ing_form = "pres|ptcp",
ed_form = "past",
en_form = "past|ptcp",
}
local function conjugate_verb(base)
local def_s_form, def_ing_form, def_ed_form = base_default_verb_forms(base.lemma)
local function process_specs(slot, specs, default_form, canonicalize_plusplus)
for _, spec in ipairs(specs) do
local form = spec.form
if not form or form == "+" then
form = default_form
elseif form == "++" then
form = canonicalize_plusplus()
end
-- If there's a ~ in the form, substitute it with the lemma,
-- but make sure to first replace % in the lemma with %% so that
-- it doesn't get interpreted as a capture replace expression.
if form:find("~") then
-- Assign to a var because gsub returns multiple values.
local subbed_lemma = base.lemma:gsub("%%", "%%%%")
form = form:gsub("~", subbed_lemma)
end
-- If the form is -, don't insert any forms, which will result
-- in there being no overall forms (in fact it will be nil).
-- We check for that down below and substitute a single "-" as
-- the form, which in turn gets turned into special labels like
-- "no present participle".
if form ~= "-" then
iut.insert_form(base.forms, slot, {form = form, footnotes = spec.q})
end
end
end
process_specs("s_form", base.s_specs, def_s_form,
function() return compute_plusplus_s_form(base.lemma, def_s_form) end)
process_specs("ing_form", base.ing_specs, def_ing_form,
function() return compute_double_last_cons_stem(base.lemma) .. "ing" end)
process_specs("ed_form", base.ed_specs, def_ed_form,
function() return compute_double_last_cons_stem(base.lemma) .. "ed" end)
-- If the -en spec is completely missing, substitute the -ed spec in its entirely.
-- Otherwise, if individual -en forms are missing or use +, we will substitute the
-- default -ed form, as with the -ed spec.
local en_specs = base.en_specs
if #en_specs == 1 and not en_specs[1].form and not en_specs[1].q then
en_specs = base.ed_specs
end
process_specs("en_form", en_specs, def_ed_form,
function() return compute_double_last_cons_stem(base.lemma) .. "ed" end)
iut.insert_form(base.forms, "lemma", {form = base.lemma})
-- Add linked version of lemma for use in head=. We write this in a general fashion in case
-- there are multiple lemma forms (which isn't possible currently at this level, although it's
-- possible overall using the ((...,...)) notation).
iut.insert_forms(base.forms, "lemma_linked", iut.map_forms(base.forms.lemma, function(form)
if form == base.lemma and base.linked_lemma:find("%[%[") then
return base.linked_lemma
else
return form
end
end))
end
local inflect_props = {
slot_table = all_verb_slots,
inflect_word_spec = conjugate_verb,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
-- (4) Fetch the forms and put the conjugated lemmas in data.heads if not explicitly given.
local function fetch_forms(slot)
local forms = alternant_multiword_spec.forms[slot]
-- See above. This should only occur if the user explicitly used -
-- for a spec.
if not forms or #forms == 0 then
forms = {{form = "-"}}
end
return forms
end
pres_3sgs = fetch_forms("s_form")
pres_ptcs = fetch_forms("ing_form")
pasts = fetch_forms("ed_form")
past_ptcs = fetch_forms("en_form")
-- Use the "linked" form of the lemma as the head if no head= explicitly given and the user specified brackets
-- in one of the lemmas. Otherwise we use the default headword-linking algorithm.
if #data.user_specified_heads == 0 and alternant_multiword_spec.saw_bracket then
data.heads = {}
for _, lemma_obj in ipairs(alternant_multiword_spec.forms.lemma_linked) do
local lemma = lemma_obj.form
local footnotes = lemma_obj.footnotes
local quals, refs
if footnotes then
quals, refs = require("Module:inflection utilities").fetch_headword_qualifiers_and_references(footnotes)
end
table.insert(data.heads, {term = lemma, q = quals, refs = refs})
end
end
else
-------------------------- SEPARATE-PARAM FORMAT --------------------------
local pres_3sg, pres_ptc, past
if par1 and not par2 and not par3 then
-- Use of a single parameter other than "++", "*" or "++*" is now the "legacy" format,
-- and no longer supported.
if par1 == "es" or par1 == "ies" or par1 == "d" then
error("Legacy parameter 1=es/ies/d no longer supported, just use 'en-verb' without params")
elseif par1 == "++" or par1 == "*" or par1 == "++*" then
pres_3sg = canonicalize_s_form(par1)
pres_ptc = canonicalize_ing_form(par1)
past = canonicalize_ed_form(par1)
else
error("Legacy parameter 1=STEM no longer supported, just use 'en-verb' without params")
end
else
if par3 then
track("xxx3")
elseif par2 then
track("xxx2")
end
end
if not pres_3sg or not pres_ptc or not past then
-- Either all three should be set above, or none of them.
assert(not pres_3sg and not pres_ptc and not past)
if par1 then
pres_3sg = canonicalize_s_form(par1)
else
pres_3sg = new_default_s
end
if par2 then
pres_ptc = canonicalize_ing_form(par2)
else
pres_ptc = new_default_ing
end
if par3 then
past = canonicalize_ed_form(par3)
else
past = new_default_ed
end
end
if par4 then
past_ptcs_given = true
past_ptc = canonicalize_ed_form(par4)
else
past_ptc = past
end
pres_3sgs = {{form = pres_3sg}}
pres_ptcs = {{form = pres_ptc}}
pasts = {{form = past}}
past_ptcs = {{form = past_ptc}}
end
------------------------------------------- HANDLE OVERRIDES ------------------------------------------
local pres_3sg_infls, pres_ptc_infls, past_infls, past_ptc_infls
local function strip_brackets(qualifiers)
if not qualifiers then
return nil
end
local stripped_qualifiers = {}
for _, qualifier in ipairs(qualifiers) do
local stripped_qualifier = qualifier:match("^%[(.*)%]$")
if not stripped_qualifier then
error("Internal error: Qualifier should be surrounded by brackets at this stage: " .. qualifier)
end
table.insert(stripped_qualifiers, stripped_qualifier)
end
return stripped_qualifiers
end
local function collect_forms(label, accel_form, defaults, overrides, override_qualifiers, canonicalize)
if defaults[1].form == "-" then
return {label = "無" .. label}
else
local into_table = {label = label, accel = {form = accel_form}}
local maxindex = math.max(#defaults, overrides.maxindex)
local qualifiers = override_qualifiers[1] and {override_qualifiers[1]} or strip_brackets(defaults[1].footnotes)
table.insert(into_table, {term = defaults[1].form, q = qualifiers})
-- Present 3rd singular
for i = 2, maxindex do
local override_form = canonicalize(overrides[i])
if override_form then
-- If there is an override such as past_ptc2=..., only use the qualifier specified
-- using an override (past_ptc2_qual=...), if any; it doesn't make sense to combine
-- an override form with a qualifier specified inside of angle brackets.
table.insert(into_table, {term = override_form, q = {override_qualifiers[i]}})
elseif defaults[i] then
-- If the form comes from inside angle brackets, allow any override qualifier
-- (past_ptc2_qual=...) to override any qualifier specified inside of angle brackets.
-- FIXME: Maybe we should throw an error here if both exist.
local qualifiers = override_qualifiers[i] and {override_qualifiers[i]} or strip_brackets(defaults[i].footnotes)
table.insert(into_table, {term = defaults[i].form, q = qualifiers})
end
end
return into_table
end
end
local pres_3sg_infls = collect_forms("第三人稱單數簡單現在時", "3|s|pres",
pres_3sgs, args[1], args.pres_3sg_qual, canonicalize_s_form)
local pres_ptc_infls = collect_forms("現在分詞", "pres|ptcp",
pres_ptcs, args[2], args.pres_ptc_qual, canonicalize_ing_form)
local past_infls = collect_forms("過去式", "past",
pasts, args[3], args.past_qual, canonicalize_ed_form)
local past_ptc_infls = collect_forms("過去分詞", "past|ptcp",
past_ptcs, args[4], args.past_ptc_qual, canonicalize_ed_form)
-- Are the past forms identical to the past participle forms? If so, we use a single
-- combined "simple past and past participle" label on the past tense forms.
-- We check for two conditions: Either no past participle forms were given at all, or
-- they were given but are identical in every way (all forms and qualifiers) to the past
-- tense forms. The former "no explicit past participle forms" check is important in the
-- "separate-parameter" format; if past tense overrides are given and no past participle
-- forms given, the past tense overrides should apply to the past participle as well.
-- In the angle-bracket format, it's expected that all forms and qualifiers are specified
-- using that format, and we explicitly copy past tense forms and qualifiers to past
-- participle ones if the latter are omitted, so we disable to "no explicit past participle
-- forms" check.
if args[4].maxindex > 0 or args.past_ptc_qual.maxindex > 0 then
past_ptcs_given = true
end
local identical = true
-- For the past and past participle to be identical, there must be
-- the same number of inflections, and each inflection must match
-- in term and qualifiers.
if #past_infls ~= #past_ptc_infls then
identical = false
else
for key, val in ipairs(past_infls) do
if past_ptc_infls[key].term ~= val.term then
identical = false
break
else
local quals1 = past_ptc_infls[key].q
local quals2 = val.q
if (not not quals1) ~= (not not quals2) then
-- one is nil, the other is not
identical = false
elseif quals1 and quals2 then
-- qualifiers present in both; each qualifier must match
if #quals1 ~= #quals2 then
identical = false
else
for k, v in ipairs(quals1) do
if v ~= quals2[k] then
identical = false
break
end
end
end
end
if not identical then
break
end
end
end
end
-- Insert the forms
table.insert(data.inflections, pres_3sg_infls)
table.insert(data.inflections, pres_ptc_infls)
if not past_ptcs_given or identical then
if past_ptcs[1].form == "-" then
past_infls.label = "無一般過去時或過去分詞"
else
past_infls.label = "一般過去時及過去分詞"
past_infls.accel = {form = "ed-form"}
end
table.insert(data.inflections, past_infls)
else
table.insert(data.inflections, past_infls)
table.insert(data.inflections, past_ptc_infls)
end
if pagename:find(" ") then
-- Check for placeholder "it"
local words = rsplit(pagename, " ")
for i, word in ipairs(words) do
if word == "it" or word == "its" or word == "it's" then
table.insert(data.categories, "帶位標“it”的" .. langname .. '詞')
break
end
end
-- Check for phrasal verbs
local phrasal_particles = require("Module:table/listToSet") {
-- NOTE: This should only contain common adverbial particles, not random words like [[low]],
-- [[adrift]], etc.
"aback",
"about",
"above",
"across",
"after",
"against",
"ahead",
"along",
"apart",
"around",
"as",
"aside",
"at",
"away",
"back",
"before",
"behind",
"below",
"between",
"beyond",
"by",
"down",
"for",
"forth",
"from",
"in",
"into",
"of",
"off",
"on",
"onto",
"out",
"over",
"past",
"round",
"through",
"to",
"together",
"towards",
"under",
"up",
"upon",
"with",
"without",
}
local allowed_non_particle_words = require("Module:table/listToSet") {
"it",
"one",
"oneself",
"someone",
}
local base = pagename
local seen_particles = {}
-- Only consider a verb to be phrasal if it consists of a single base verb followed exclusively by either
-- particles from `phrasal_particles` or placeholder words from `allowed_non_particle_words`, where at
-- least one following word is from `phrasal_particles` (hence [[can it]] is not a phrasal verb).
while true do
local prev, particle = base:match("^(.+) (.-)$")
if not prev then
break
end
if phrasal_particles[particle] then
table.insert(seen_particles, particle)
elseif allowed_non_particle_words[particle] then
-- do nothing
else
break
end
base = prev
end
if not base:find(" ") and #seen_particles > 0 then
table.insert(data.categories, langname .. "動詞短語")
for i = #seen_particles, 1, -1 do
table.insert(data.categories, "帶助詞(" .. seen_particles[i] .. ")的" .. langname .. "動詞短語")
end
end
end
end
}
pos_functions["動詞"] = pos_functions["verbs"]
return export