local export = {}
local u = mw.ustring.char
local a = u(0xF000)
local remove_diacritics = u(0x0300) .. "-" .. u(0x0302) .. u(0x0308) .. "'" -- grave, acute, circumflex, diaeresis, apostrophe
local oneChar = {
["k"] = "c"
}
local twoChars = {
["ch"] = "c" .. a, ["dd"] = "d" .. a, ["ff"] = "f" .. a, ["ll"] = "l" .. a, ["ph"] = "p" .. a, ["rh"] = "r" .. a, ["th"] = "t" .. a
}
local threeChars = {
["ngh"] = "g" .. a
}
function export.makeSortKey(text, lang, sc)
text = mw.ustring.lower(text)
for from, to in pairs(threeChars) do
text = mw.ustring.gsub(text, from, to)
end
for from, to in pairs(twoChars) do
text = mw.ustring.gsub(text, from, to)
end
return mw.ustring.upper(mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(mw.ustring.gsub(text, ".", oneChar)), "[" .. remove_diacritics .. "]", ""))) -- decompose, remove appropriate diacritics, then recompose again
end
return export