local export = {}
-- internal encoding using [a-zA-Z]
export.encode = {
["ã"] = "A",
["ɓ"] = "B",
["cʼ"] = "C",
["ɗ"] = "D",
["ɛ"] = "E",
["ɠ"] = "G",
["ĩ"] = "I",
["ɲ"] = "N",
["ɔ"] = "O",
["sh"] = "S",
["tʼ"] = "T",
["ʔ"] = "Q",
}
export.decode = {
A = "ã",
B = "ɓ",
C = "cʼ",
D = "ɗ",
E = "ɛ",
G = "ɠ",
I = "ĩ",
N = "ɲ",
O = "ɔ",
S = "sh",
T = "tʼ",
Q = "ʔ",
}
export.sortkey = {
A = "a",
B = "b",
C = "c",
D = "d",
E = "e",
G = "g",
I = "i",
N = "n",
O = "o",
S = "sh",
T = "t",
Q = "ʔ",
}
-- parse a word into syllables using the internal encoding
-- returns a table with extra info, e.g. "Wucʼê" becomes:
-- { "wu", "Ce", accent=2, cap=true, falling=true }
function export.syllabify(word)
word = mw.ustring.toNFD(word)
local lowered = word:ulower()
local cap = lowered ~= word
word = lowered
local accent = word:match('\204[\129\130]')
local _, count = word:gsub('\204[\129\130]','')
if count > 1 then error("More than one diacritic found.") end
word = word:gsub("[tc]ʼ",export.encode)
if word:match("ʼ") then error("Uncoupled ʼ found.") end
word = word:gsub("sh","S")
word = word:gsub("[\194-\223][\128-\191]",export.encode)
word = word:gsub("[bBcCdDgGhjklmnNpqrsStwxyzQ][aeiouEO]",".%0")
:gsub("([aeiouEO])([aeiouEO]\204[\129\130])","%1.%2") -- e.g. tiá -> ti.á
:gsub("^%.","")
:gsub("%.%.+",".")
local syllables = mw.text.split(word,".",true)
local accented = 0
for i,syl in ipairs(syllables) do
syllables[i],count = syl:gsub("\204[\129\130]","")
if count == 1 then
accented = i
break
end
end
syllables.accent = accented
syllables.cap = cap
syllables.falling = accent == "\204\130"
return syllables
end
-- inverse of export.syllabify
function export.combine(syllables)
local a,c,f = syllables.accent, syllables.cap, syllables.falling
local diacritic = f and "\204\130" or "\204\129"
local word = "" -- do not use table.concat to avoid modifying input
for i,syl in ipairs(syllables) do
if i == a then
syl = syl:gsub("[aeiouEO]","%0"..diacritic,1)
end
word = word .. syl
end
word = word:gsub("[BCDEGNOSTQ]",export.decode)
if c then
word = word:gsub("^[\1-\127\194-\255][\128-\191]*",string.uupper,1)
end
return mw.ustring.toNFC(word)
end
-- generates the sort key for categorization
-- wucʼê --> wuce2'
-- (2: accent on second syllable)
-- (apostrophe at the end: falling tone)
function export.makeSortKey(text, lang, sc)
if lang ~= "amf" or sc ~= "Latn" then
require("Module:debug").track("amf-utilities/sort")
return text
end
words = mw.text.split(text, " ", true)
for i,word in ipairs(words) do
local success, syllables = pcall(export.syllabify,word)
if success then
words[i] = table.concat(syllables):gsub("[BCDEGNOSTQ]",export.sortkey)
.. syllables.accent
.. (syllables.falling and "'" or "")
else
require("Module:debug").track("amf-utilities/sort")
end
end
return table.concat(words, " ")
end
return export