模組:Family tree/etymology languages

local language_codes = require "Module:languages/code to canonical name"

-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer (Guernsey and Jersey).
-- Adds 2 to the weighting.
local function isLangCode(code)
	if language_codes[code:match("^%l+")] then return 1 else return 0 end
end

-- Order of preference:
-- xx, xxx, xx-xxx, xxx-xxx, xx-xxx-xxx, xxx-xxx-xxx, xx-XX, xxx-XX, xx-XX-xxx, xxx-XX-xxx
-- Language codes are preferred over family codes of the same format.
local function determine_preferred_etymology_language_code(code1, code2)
	local function weighting(code)
		if code:find("^%l%l$") then
			return 14
		elseif code:find("^%l%l%l$") then
			return 13
		elseif code:find("^%l%l%-%l%l%l$") then
			return 12
		elseif code:find("^%l%l%l%-%l%l%l$") then
			return 10 + isLangCode(code)
		elseif code:find("^%l%l%-%l%l%l%-%l%l%l$") then
			return 9
		elseif code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
			return 7 + isLangCode(code)
		elseif code:find("^%l%l%-%u%u$") then
			return 6
		elseif code:find("^%l%l%l%-%u%u$") then
			return 4 + isLangCode(code)
		elseif code:find("^%l%l%-%u%u%-%l%l%l$") then
			return 3
		elseif code:find("^%l%l%l%-%u%u%-%l%l%l$") then
			return 1 + isLangCode(code)
		else
			return 0
		end
	end
	
	local weighting1, weighting2 = weighting(code1), weighting(code2)
	if weighting1 > weighting2 then
		return code1
	elseif weighting1 < weighting2 then
		return code2
	elseif #code1 < #code2 then
		return code1
	elseif #code1 > #code2 then
		return code2
	-- If all else fails, use alphabetical order.
	elseif code1 > code2 then
		return code2
	else
		return code1
	end
end

local function fold(t, accum, func)
	for k, v in pairs(t) do
		accum = func(k, v, accum)
	end
	return accum
end

local function invert(t)
	local inverted = {}
	for k, v in pairs(t) do
		inverted[v] = k
	end
	return inverted
end

return invert(fold(
	require "Module:etymology languages/data",
	{},
	function (code, data, data_to_code)
		if data_to_code[data] then
			local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
			data_to_code[data] = preferred_code
			table.insert(data.codes, code)
		else
			data_to_code[data] = code
			data.codes = { code }
		end
		return data_to_code
	end))