local export = {}
local U = mw.ustring.char
local acute, grave, macron, underdot = U(0x301), U(0x300), U(0x304),U(0x323)
local tone = "[" .. acute .. grave .. macron .. "]"
local Vn_pattern = "([aeiou])" .. tone .. "*(" .. underdot .. "?)" .. tone .. "*n"
local function can_be_nasal(vowel, possible_underdot)
return vowel == "a" or vowel == "i" or vowel == "u"
or ((vowel == "e" or vowel == "o") and possible_underdot == underdot)
end
-- Adégbé-n-ró for Adégbén̄ró not supported (considered multiword).
function export.is_multiword(term)
-- Decompose so that diacritics are separate code points and can be matched.
term = mw.ustring.toNFD(mw.ustring.lower(term))
for start_pos, possible_break, end_pos in mw.ustring.gmatch(term, "()([%s%p])()") do
if possible_break == "-" then
local V1, underdot1 = mw.ustring.match(mw.ustring.sub(term, 1, start_pos - 1), Vn_pattern .. "$")
local V2, underdot2 = mw.ustring.match(term, "^" .. Vn_pattern, end_pos)
if not (can_be_nasal(V1, underdot1) and can_be_nasal(V2, underdot2) and V1 == V2) then
return true
end
else
return true
end
end
return false
end
return export