模組:Grc-decl/decl/staticdata

--[[
	Abbreviations and codes used in declension names:
	- accent
		- nothing: oxytone
		- pax: paroxytone
		- prx: proparoxytone
		- con: perispomenon
	- contraction
		- con: contracted (perispomenon)
		- open: uncontracted
	- the characteristics of the ending (historically the stem)
		- 1st declension
			- alp: long alpha
			- eta: eta
			- als: long alpha, but short alpha in nominative and accusative singular
			- als: eta, but short alpha in nominative and accusative singular
		- 3rd declension
			- pure: begins with ι or υ throughout
			- weak: begins with ι or υ alternating with ε or η
]]

local export = {
	adjinflections = {},
	adjinflections_con = {},
}

local module_path = 'Module:grc-decl/sandbox/decl/staticdata'
local m_paradigms = mw.loadData(module_path .. "/paradigms")

local ustring = mw.ustring
local U = ustring.char
local sub = ustring.sub
local gsub = ustring.gsub
local toNFC = ustring.toNFC
local toNFD = ustring.toNFD

--[[
	Creates a version of an ending with macrons removed, and with macrons or breves switched.
	The switching is pointless for endings containing more than one macron or breve:
	for instance, ᾱσᾰ becomes ᾰσᾱ.
]]
local function mess_with_length(text)
	-- local text = toNFD(text)
	local macron = U(0x304)
	local breve = U(0x306)
	
	local unmarked_length = gsub(text, "[" .. macron .. breve .. "]", "")
	local opposite_length = gsub(
		text,
		"[" .. macron .. breve .. "]",
		{ [macron] = breve, [breve] = macron }
	)
	
	return unmarked_length, opposite_length
end

-- Adds a key with macrons or breves removed to the table.
local function add_unmarked_form(list, key, value)
	if type(list) ~= "table" then
		return list
	end
	
	-- key = toNFD(key)
	local unmarked_length, opposite_length = mess_with_length(key)
	if unmarked_length ~= key and not list[opposite_length] then
		list[unmarked_length] = list[unmarked_length] or value
	end
	
	list[key] = list[key] or value
	
	return list
end

--[=[
	Noun declension categories that [[Module:grc-decl/sandbox/decl]]
	uses to determine the declension category of the nominative and genitive forms
	that are supplied to the template.
]=]
local infl_categories = {
	['1st-alp'] = m_paradigms.alp_pax,
	['1st-eta'] = m_paradigms.eta_pax,
	['1st-als'] = m_paradigms.als_prx,
	['1st-ets'] = m_paradigms.ets_prx,
	['1st-M-alp'] = m_paradigms.M_alp_pax,
	['1st-M-alp2'] = m_paradigms.M_alp_con, -- add 2 for unique key
	['1st-M-eta'] = m_paradigms.M_eta_pax,
	['2nd'] = m_paradigms.second,
	['2nd-con'] = m_paradigms.second_con,
	['2nd-N'] = m_paradigms.second_N,
	['2nd-N-con'] = m_paradigms.second_N_con,
	['2nd-att'] = m_paradigms.second_att,
	['2nd-N-att'] = m_paradigms.second_N_att_prx,
	['3rd-εσ'] = m_paradigms.es_adj,
	['3rd-εσ-open'] = m_paradigms.es_adj_prx_open,
	['3rd-N-εσ'] = m_paradigms.N_es_adj,
	['3rd-N-ος'] = m_paradigms.N_es_prx,
	['3rd-N-ος-open'] = m_paradigms.N_es_prx_open,
	['3rd-N-ᾰσ'] = m_paradigms.N_as_prx,
	['3rd-N-ᾰσ-open'] = m_paradigms.N_as_prx_open,
	['3rd-κλῆς'] = m_paradigms.kles,
	['3rd-κλῆς-open'] = m_paradigms.kles_open, -- add 2 for unique key
	['3rd-weak-ι'] = m_paradigms.weak_i_prx,
	['3rd-weak-υ'] = m_paradigms.weak_u,
	['3rd-N-weak-ι'] = m_paradigms.N_weak_i_prx,
	['3rd-N-weak-υ'] = m_paradigms.N_weak_u,
	['3rd-pure-ι'] = m_paradigms.pure_i_prx,
	['3rd-N-pure-ι'] = m_paradigms.N_pure_i_prx,
	['3rd-pure-υ-long'] = m_paradigms.pure_u_long_prx,
	['3rd-pure-υ'] = m_paradigms.pure_u,
	['3rd-N-pure-υ'] = m_paradigms.N_pure_u_prx,
	['3rd-ευς'] = m_paradigms.eus,
	['3rd-ευς-con'] = m_paradigms.eus_con,
	['3rd-οι'] = m_paradigms.oi,
}

-- Used by the function that generates the list of declension categories on the documentation page.
export.conversion = {
	['1st-alp'] = 'alp_pax',
	['1st-eta'] = 'eta_pax',
	['1st-als'] = 'als_prx',
	['1st-ets'] = 'ets_prx',
	['1st-M-alp'] = 'M_alp_pax',
	['1st-M-alp2'] = 'M_alp_con',
	['1st-M-eta'] = 'M_eta_pax',
	['2nd'] = 'second',
	['2nd-con'] = 'second_con',
	['2nd-N'] = 'second_N',
	['2nd-N-con'] = 'second_N_con',
	['2nd-att'] = 'second_att',
	['2nd-N-att'] = 'second_N_att_prx',
	['3rd-εσ'] = 'es_adj',
	['3rd-εσ-open'] = 'es_adj_prx_open',
	['3rd-N-εσ'] = 'N_es_adj',
	['3rd-N-ος'] = 'N_es_prx',
	['3rd-N-ος-open'] = 'N_es_prx_open',
	['3rd-N-ᾰσ'] = 'N_as_prx',
	['3rd-κλῆς'] = 'kles',
	['3rd-κλῆς-open'] = 'kles_open',
	['3rd-weak-ι'] = 'weak_i_prx',
	['3rd-weak-υ'] = 'weak_u',
	['3rd-N-weak-ι'] = 'N_weak_i_prx',
	['3rd-N-weak-υ'] = 'N_weak_u',
	['3rd-pure-ι'] = 'pure_i_prx',
	['3rd-N-pure-ι'] = 'N_pure_i_prx',
	['3rd-pure-υ-long'] = 'pure_u_long_prx',
	['3rd-pure-υ'] = 'pure_u',
	['3rd-N-pure-υ'] = 'N_pure_u_prx',
	['3rd-ευς'] = 'eus',
	['3rd-ευς-con'] = 'eus_con',
	['3rd-οι'] = 'oi',
}

infl_info = {}
export.ambig_forms = {}

-- Constructs a table for nouns with the same structure as the one for adjectives.
local strip_tone = require("Module:grc-accent").strip_tone
local longest_nominative_ending = 0
for name, decl in pairs(infl_categories) do
	local nom = strip_tone(sub(decl.NS, 2))
	local gen = strip_tone(sub(strip_tone(decl.GS), 2))
	
	longest_nominative_ending = math.max(longest_nominative_ending, mw.ustring.len(mw.ustring.toNFC(nom)))
	
	if not infl_info[nom] then
		infl_info[nom] = {}
	end
	
	if type(infl_info[nom]) == "table" then
		if infl_info[nom][gen] then
			error('Conflict in noun declensions; two declensions with nominative ' .. nom ..
					' and genitive ' .. gen .. '.')
		end
		
		name = gsub(name, "%d$", "")
		
		infl_info[nom][gen] = name
	end
end
infl_info.longest_nominative_ending = longest_nominative_ending

for nominative, list in pairs(infl_info) do
	local unmarked_length, opposite_length = mess_with_length(nominative)
	local data_for_opposite
	
	if unmarked_length ~= opposite_length then
		data_for_opposite = infl_info[opposite_length] or infl_info[toNFC(opposite_length)]
		if not data_for_opposite then
			infl_info[unmarked_length] = nominative
		end
	end
	
	if type(list) == "table" then
		local new_list = {}
		
		for gen, name in pairs(list) do
			local unmarked_length, opposite_length = mess_with_length(gen)
			if unmarked_length ~= gen and not list[opposite_length] then
				new_list[unmarked_length] = name
			end
			
			new_list[gen] = name
		end
		list = new_list
		
		local combined_gens = {}
		
		if data_for_opposite and not infl_info[unmarked_length] then
			local is_ambig = false
			for gen, name in pairs(list) do
				combined_gens[gen] = name
			end
			
			for gen, name in pairs(data_for_opposite or {}) do
				local gen_unmarked_length, gen_opposite_length = mess_with_length(gen)
				if list[gen] or list[gen_opposite_length] then
					--[[
						If there are two declension types with the same nominative
						and genitive endings aside from the length of the nominative,
						then strip the macron or breve from both forms and record them.
					]]
					is_ambig = true
				end
				
				combined_gens = add_unmarked_form(combined_gens, gen, name)
			end
			
			if is_ambig then
				--[[
					Remove length marks and record all the nominative–genitive pairs
					that were found.
				]]
				local nom = mess_with_length(nominative)
				
				for gen, name in pairs(combined_gens) do
					gen = mess_with_length(gen)
					export.ambig_forms[nom] = export.ambig_forms[nom] or {}
					export.ambig_forms[nom][gen] = true
				end
			else
				infl_info[unmarked_length] = combined_gens
			end
		end
		
		infl_info[nominative] = list
	end
end

local infl_info_adj = {
	['ος'] = {
		['ᾱ'] = '1&2-alp',
		['η'] = '1&2-eta',
		['ον'] = '2nd',
	},
	['ους'] = {
		['ᾱ'] = '1&2-alp-con',
		['η'] = '1&2-eta-con',
		['ουσᾰ'] = '1&3-ουντ',
	},
	['ῠς'] = { ['ειᾰ'] = '1&3-ups' },
--	['υς'] = 'ῠς',
	['ως'] = {
		['υιᾰ'] = '1&3-οτ',
		['ων'] = '2nd-att',
	},
	['ᾱς'] = {
		['αινᾰ'] = '1&3-ᾰν',
		['ᾱσᾰ'] = '1&3-ᾰντ',
	},
	['ην'] = { ['εινᾰ'] = '1&3-εν' },
	['εις'] = {
		['εισᾰ'] = '1&3-εντ',
		['εσσᾰ'] = '1&3-εσσ',
	},
	['ων'] = {
		['ουσᾰ'] = '1&3-οντ',
		['ωσᾰ'] = '1&3-ωντ',
	},
	['ῡς'] = { ['ῡσᾰ'] = '1&3-ῠντ' },
	['ης'] = { ['ες'] = '3rd-εσ' },
}

export.ambig_forms_adj = {}
--[[
	[''] = '',
]]

for masculine, list in pairs(infl_info_adj) do
	if type(list) == "table" then
		-- Decompose masculine and feminine (or neuter) endings.
		local new_fems = {}
		local fems_changed = false
		for feminine, name in pairs(list) do
			new_feminine = toNFD(feminine)
			if new_feminine ~= feminine then
				fems_changed = true
			end
			new_fems[new_feminine] = name
		end
		list = new_fems
		
		new_masculine = toNFD(masculine)
		
		if fems_changed or new_masculine ~= masculine then
			-- Delete existing entry
			infl_info_adj[masculine] = nil
			-- Create decomposed one.
			infl_info_adj[new_masculine] = list
		end
	elseif type(list) == "string" then
		if not infl_info_adj[list] then
			error(masculine .. ' has been redirected to the form ' .. list .. ', which does not exist.')
		end
	end
end

local longest_masculine_ending = 0
for masculine, list in pairs(infl_info_adj) do
	local unmarked_length, opposite_length = mess_with_length(masculine)
	local data_for_opposite
	
	longest_masculine_ending =
		math.max(longest_masculine_ending, mw.ustring.len(mw.ustring.toNFC(masculine)))
	
	if unmarked_length ~= opposite_length then
		data_for_opposite = infl_info_adj[opposite_length] or infl_info_adj[toNFC(opposite_length)]
		if not data_for_opposite then
			infl_info_adj[unmarked_length] = masculine
		end
	end
	
	local new_list = {}
	for fem, name in pairs(list) do
		local unmarked_length, opposite_length = mess_with_length(fem)
		if unmarked_length ~= fem and not list[opposite_length] then
			new_list[unmarked_length] = name
		end
		
		new_list[fem] = name
	end
	list = new_list
	
	local combined_fems = {}
	
	if data_for_opposite and not infl_info_adj[unmarked_length] then
		for fem, name in pairs(list) do
			combined_fems[fem] = name
		end
		
		for fem, name in pairs(data_for_opposite or {}) do
			if list[fem] then
				--[[
					If there are declension types with the same masculine and
					feminine endings, aside from the length (not currently true),
					then strip the macron or breve and log them.
				]]
				local key = mess_with_length(masculine)
				local value = mess_with_length(fem)
				export.ambig_forms_adj[key] = value
				combined_fems = nil
				break
			end
			
			combined_fems = add_unmarked_form(combined_fems, fem, name)
		end
		
		if combined_fems then
			infl_info_adj[unmarked_length] = combined_fems
		end
	end
	
	infl_info_adj[masculine] = list
end
infl_info_adj.longest_masculine_ending = longest_masculine_ending

--[[
	Mapping from adjectival declension codes to the declension type used for the masculine (1),
	feminine (2), and neuter (3), along with any suffixes added to the stem in the masculine
	and neuter (a1) or feminine (a2).
]]

export.headers = {
	['1&2'] = '[[Appendix:Ancient Greek first declension|First]] and [[Appendix:Ancient Greek second declension|second]] declension',
	['1&3'] = '[[Appendix:Ancient Greek first declension|First]] and [[Appendix:Ancient Greek third declension|third]] declension',
	['2nd'] = '[[Appendix:Ancient Greek second declension|Second declension]]',
	['3rd'] = '[[Appendix:Ancient Greek third declension|Third declension]]',
	['Attic'] = '[[Appendix:Ancient Greek Attic declension|Attic second declension]]',
	['irreg'] = 'Irregular declension',
}

for k, header in pairs(m_paradigms.headers) do
	if export.headers[k] then
		if export.headers[k] ~= header then
			error('Two headers with same name: ' .. export.header[k] .. ', ' .. header .. '.')
		end
	else
		export.headers[k] = header
	end
end

--First-and-second-declension adjectives
export.adjinflections['1&2-alp'] = { '2nd', '1st-alp', '2nd-N', adeclheader = '1&2' }
export.adjinflections['1&2-alp-con'] = { '2nd', '1st-alp', '2nd-N', a1 = 'ε', a2 = 'ε', adeclheader = '1&2' } -- avoid collisions
export.adjinflections['1&2-eta'] = { '2nd', '1st-eta', '2nd-N', adeclheader = '1&2' }
export.adjinflections['1&2-eta-con'] = { '2nd', '1st-alp', '2nd-N', a1 = 'ε', a2 = 'ε', adeclheader = '1&2' }

-- First-and-third-declension adjectives
export.adjinflections['1&3-ups'] = { '3rd-weak-υ', '1st-als', '3rd-N-weak-υ', a2 = 'ει', adeclheader = '1&3' }
export.adjinflections['1&3-ᾰν'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'ᾰν', a2 = 'αιν', adeclheader = '1&3' }
export.adjinflections['1&3-εν'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'εν', a2 = 'ειν', adeclheader = '1&3' }
export.adjinflections['1&3-εσσ'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'εντ', a2 = 'εσσ', adeclheader = '1&3' }

--[[	Participles (subtype of first-and-third)		]]
export.adjinflections['1&3-ουντ'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'οντ', a2 = 'ουσ', adeclheader = '1&3' }
-- perfect active:
export.adjinflections['1&3-οτ'] = { '3rd-cons', '1st-als', '3rd-N-cons', a1 = 'οτ', a2 = 'υι', adeclheader = '1&3' }
-- From roots with reflex of *h₁, and in the aorist passive:
export.adjinflections['1&3-εντ'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'εντ', a2 = 'εισ', adeclheader = '1&3' }
-- From roots with reflex of *h₂, and 1st aorist active:
export.adjinflections['1&3-ᾰντ'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'ᾰντ', a2 = 'ᾱσ', adeclheader = '1&3' }
-- From roots with reflex of *h₃, and thematic active:
export.adjinflections['1&3-οντ'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'οντ', a2 = 'ουσ', adeclheader = '1&3' }
-- έω/όω contract; this category does not appear in infl_info_adj, because it
-- has to be assigned specifically in [[Module:grc-decl/sandbox/decl]], if
-- masculine singular has acute on ultima (is perispomenon):
export.adjinflections['1&3-οντ-con'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'ουντ', a2 = 'ουσ', adeclheader = '1&3' }
-- νῡμι active:
export.adjinflections['1&3-ῠντ'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'ῠντ', a2 = 'ῡσ', adeclheader = '1&3' }
-- άω contract:
export.adjinflections['1&3-ωντ'] = { '3rd-cons', '1st-ets', '3rd-N-cons', a1 = 'ωντ', a2 = 'ωσ', adeclheader = '1&3' }

-- Masculine and feminine identical
export.adjinflections['2nd'] = { '2nd', nil, '2nd-N', adeclheader = '2nd' }
export.adjinflections['2nd-att'] = { '2nd-att', nil, '2nd-N-att', adeclheader = 'Attic' }
export.adjinflections['3rd-cons'] = { '3rd-cons', nil, '3rd-N-cons', adeclheader = '3rd' }
export.adjinflections['3rd-εσ-open'] = { '3rd-εσ-open', nil, '3rd-N-εσ-open', adeclheader = '3rd' }

export.adjinflections['1&2-alp-con'] = { '2nd-con', '1st-alp', '2nd-N-con', adeclheader = '1&2' }
export.adjinflections['1&2-eta-con'] = { '2nd-con', '1st-eta', '2nd-N-con', adeclheader = '1&2' }
export.adjinflections['3rd-εσ'] = { '3rd-εσ', nil, '3rd-N-εσ', adeclheader = '3rd' }

export.adjinflections_con['1&2-alp-con'] = export.adjinflections['1&2-alp-con']
export.adjinflections_con['1&2-eta-con'] = export.adjinflections['1&2-eta-con']
export.adjinflections_con['3rd-εσ'] = export.adjinflections['3rd-εσ']

export.infl_info = {}
export.infl_info.noun = infl_info
export.infl_info.adj = infl_info_adj


export.irregular = {}
export.irregular.noun = {}

export.irregular.noun.masculine_feminine = {
	full = {
		false, 'NS', 'GS', 'DS', 'AS', 'VS',
		'ND', 'GD',
		'NP', 'GP', 'DP', 'AP'
	},
	S  = { false, 'NS', 'GS', 'DS', 'AS', 'VS' },
	D  = { false, 'ND', 'GD' },
	P  = { false, 'NP', 'GP', 'DP', 'AP' },
	SP = {
		false, 'NS', 'GS', 'DS', 'AS', 'VS',
		'NP', 'GP', 'DP', 'AP'
	},
	DP = {
		false, 'ND', 'GD',
		'NP', 'GP', 'DP', 'AP'
	},
}

export.irregular.noun.neuter = {
	full = { false, 'NS', 'GS', 'DS', 'ND', 'GD', 'NP', 'GP', 'DP',
		redirects = {
			AS = 'NS', VS = 'NS', AP = 'NP'
		},
	},
	DP = { false, 'ND', 'GD', 'NP', 'GP', 'DP',
		redirects = {
			AP = 'NP',
		},
	},
	SP = { false, 'NS', 'GS', 'DS', 'NP', 'GP', 'DP',
		redirects = {
			AS = 'NS', VS = 'NS', AP = 'NP'
		}
	},
	S = { false, 'NS', 'GS', 'DS',
		redirects = {
			AS = 'NS', VS = 'NS',
		},
	},
	D = { false, 'DS', 'DG' },
	P = { false, 'NP', 'GP', 'DP',
		redirects = {
			AP = 'NP'
		},
	},
}

export.irregular.adjective = {
	full = {
		false, 'MNS', 'MGS', 'MDS', 'MAS', 'MVS', 'MND', 'MGD', 'MNP', 'MGP', 'MDP', 'MAP',
		'FNS', 'FGS', 'FDS', 'FAS', 'FVS', 'FND', 'FGD', 'FNP', 'FGP', 'FDP', 'FAP',
		'NNS', 'NNP',
		redirects = {
			NGS = 'MGS', NDS = 'MDS', NAS = 'NNS', NVS = 'NNS',
			NND = 'MND', NGD = 'MGD',
			NGP = 'MGP', NDP = 'MDP', NAP = 'NNP'
		},
	},
	SP = {
		false, 'MNS', 'MGS', 'MDS', 'MAS', 'MVS', 'MNP', 'MGP', 'MDP', 'MAP',
		'FNS', 'FGS', 'FDS', 'FAS', 'FVS', 'FNP', 'FGP', 'FDP', 'FAP',
		'NNS', 'NNP',
		redirects = {
			NGS = 'MGS', NDS = 'MDS', NAS = 'NNS', NVS = 'NNS',
			NGP = 'MGP', NDP = 'MDP', NAP = 'NNP'
		},
	},
	S = {
		false, 'MNS', 'MGS', 'MDS', 'MAS', 'MVS',
		'FNS', 'FGS', 'FDS', 'FAS', 'FVS',
		'NNS',
		redirects = { NGS = 'MGS', NDS = 'MDS', NAS = 'NNS', NVS = 'NNS', },
	},
	-- Unlikely ever to be used.
	D = {
		false, 'MND', 'MGD',
		'FND', 'FGD',
		redirects = { NND = 'MGD', NGD = 'MGD', }
	},
	P = {
		false, 'MNP', 'MGP', 'MDP', 'MAP',
		'FNP', 'FGP', 'FDP', 'FAP',
		'NNP',
		redirects = { NGP = 'MGP', NDP = 'MDP', NAP = 'NNP' },
	},
}

return export