local export = {}
local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("szl")
local V = "aãõɛeɔoɨui";
local di = {
["cz"]="t_ʂ", ["rz"]="ꙮ", ["sz"]="ʂ", ["dz"]="d_z",
}
local phon = {
["a"]="a", ["ã"]="ã", ["b"]="b",
["c"]="t_s", ["ć"]="t_ɕ", ["d"]="d", ["e"]="ɛ", ["é"]="e",
["f"]="f", ["g"]="ɡ", ["h"]="x",
["i"]="i", ["j"]="j", ["k"]="k", ["l"]="l",
["ł"]="w", ["m"]="m", ["n"]="n", ["ń"]="ɲ",
["o"]="ɔ", ["ŏ"]="ɔW", ["ō"]="o", ["ô"]="wɔ", -- õ is dealt with later on
["p"]="p", ["r"]="r", ["s"]="s", ["ś"]="ɕ", ["t"]="t",
["u"]="u", ["w"]="v", ["y"]="ɨ",
["z"]="z", ["ż"]="ʐ", ["ź"]="ʑ",
}
local function phonemic(text)
local ante = false;
local unstressed = false;
if (text:find('^*')) then
unstressed = true;
text = text:sub(2);
elseif (text:find('^%^')) then
ante = true;
text = text:sub(2);
end
function rsub(s, r) text = mw.ustring.gsub(text, s, r); end
text = mw.ustring.lower(text);
rsub('ch', 'x'); rsub('[crsd]z', di); rsub('dż', 'd_ʐ'); -- handle digraphs
rsub(".", phon) -- basic orthographical rules
rsub('au', 'aW');
rsub("n([kɡx])", "ŋ%1");
-- palatalisation
local C_palat = 'bdfɡxklmnprstvzʐʂ'
-- palatilisation by <-i->
rsub("(["..C_palat.."])i(["..V.."])", function (c, v)
return (({
['n'] = 'ɲ',
['s'] = 'ɕ', ['z'] = 'ʑ',
})[c] or c .. 'I') .. v;
end);
-- palatalisation by front vowels
local F = 'eéiy';
rsub('(['..C_palat..'])i', function (c)
return (({
['n'] = 'ɲ',
['s'] = 'ɕ', ['ʃ'] = 'ɕ',
['z'] = 'ʑ', ['ʒ'] = 'ʑ',
})[c] or c) .. 'i';
end);
-- voicing and devoicing
local T = 'ptsʂɕkx';
local D = 'bdzʐʑɡ';
rsub('(['..T..'])v', '%1f');
rsub('(['..T..'])ꙮ', '%1ʂ'); rsub('ꙮ', 'ʐ');
local function arr_list(x) local r = ''; for i in pairs(x) do r = r..i; end return r; end
local devoice = {
['b'] = 'p', ['d'] = 't', ['ɡ'] = 'k',
['z'] = 's', ['v'] = 'f', ['ʒ'] = 'ʃ',
['ʑ'] = 'ɕ', ['ʐ'] = 'ʂ',
};
rsub('['..arr_list(devoice)..']$', devoice);
local voice = {}; for i, v in pairs(devoice) do voice[v] = i; end
local arr_list_devoice = arr_list(devoice);
local arr_list_voice = arr_list(voice);
for _ = 0, 5 do
rsub('(['..arr_list_devoice..'])(['..T..'])', function (a, b) return devoice[a] .. b; end)
rsub('(['..arr_list_voice..'])(['..D..'])', function (a, b) return voice[a] .. b; end)
end
rsub("t([sɕ])", "t_%1"); rsub("d([zʑ])", "d_%1"); -- affricates
-- hyphenation
rsub('%.', '!');
for _ = 0, 1 do
rsub('(['..V..'W])([^'..V..'W!.]*)(['..V..'])', function (a, b, c)
local function find(x) return mw.ustring.find(b, x); end
if ((mw.ustring.len(b) < 2) or find('^([td]_.)$')) then
b = '.'..b;
else
local i = 2;
if (find('^([td]_.)')) then i = 4; end
if (mw.ustring.sub(b, i, i):find('^[rlwI]$')) then
b = '.'..b;
else
b = mw.ustring.sub(b, 0, i - 1)..'.'..mw.ustring.sub(b, i);
end
end
return a..b..c;
end);
end
rsub('!', '.')
-- stress
if (not unstressed) then
if (ante) then
rsub('%.([^.]+%.[^.]+%.[^.]+)$', 'ˈ%1');
else
rsub('%.([^.]+%.[^.]+)$', 'ˈ%1');
end
if (not mw.ustring.find(text, 'ˈ')) then
text = 'ˈ' .. text;
end
end
-- this should best happen at the end becase <ɔ̃> is two characters long and would
-- mess up with bracket catching. in practice it would work as well without this
-- but it looks bodge-y.
rsub('õ', 'ɔ̃');
rsub('_', '͡'); rsub('I', 'j');
text = mw.ustring.lower(text);
return text
end
local function multiword(term)
if (term:find(' ')) then
local s = '';
for v in term:gmatch('[^ ]+') do
s = s..phonemic(v)..' ';
end
return s:sub(0, -2);
else
return phonemic(term);
end
end
-- for testcases
function export.testcase(text)
return multiword(text);
end
function export.IPA(frame)
local terms = {}
local args = frame:getParent().args;
for _, term in ipairs(args) do
table.insert(terms, term)
end
if #terms == 0 then
terms = {mw.title.getCurrentTitle().text}
end
local IPA_results = {}
for _, term in ipairs(terms) do
table.insert(IPA_results, { pron = "/" .. multiword(term) .. "/" })
end
return m_IPA.format_IPA_full(lang, IPA_results)
end
return export