local export = {}
local langcode = "urj-kya"
local vowel = "[аеёиіоөӧуӱэюя]"
local vowel_soft = "[еёиюя]"
local vowel_prej = "[еёюя]"
local vowel_accent = "́"
local vowel_phonemic = {
["а"] = "a", ["е"] = "e", ["ё"] = "o", ["и"] = "i",
["і"] = "i", ["о"] = "o", ["ӧ"] = "ɵ", ["у"] = "u",
["э"] = "e", ["ю"] = "u", ["я"] = "a", ["ө"] = "ʌ",
["ӱ"] = "ʉ",
}
local vowel_phonetic = {
["а"] = "ä", ["я"] = "ä"
}
local consonant = "[бвгджзйклмнпрстфхцчшщ]"
local consonant_voiced = "[бгджз]"
local consonant_unvoiced = "[птцч]"
local consonant_devoice = {
["б"] = "п",
["д"] = "т",
}
-- normal, soft
local consonant_phonemic = {
["б"] = { "b", "b" },
["в"] = { "ʋ", "ʋ" },
["г"] = { "ɡ", "ɡ" },
["д"] = { "d", "ɟ" },
["дз"] = { "d͡ʑ", "d͡ʑ" },
["дж"] = { "d͡ʒ", "d͡ʒ" },
["ж"] = { "ʒ", "ʒ" },
["з"] = { "z", "ʑ" },
["й"] = { "j", "j" },
["к"] = { "k", "k" },
["л"] = { "l", "ʎ" },
["м"] = { "m", "m" },
["н"] = { "n", "ɲ" },
["п"] = { "p", "p" },
["р"] = { "r", "rʲ" },
["с"] = { "s", "ɕ" },
["т"] = { "t", "c" },
["тш"] = { "t͡ʃ", "t͡ʃ" },
["ф"] = { "f", "f" },
["х"] = { "x", "x" },
["ц"] = { "t͡s", "t͡s" },
["ч"] = { "t͡ɕ", "t͡ɕ" },
["ш"] = { "ʃ", "ʃ" },
["щ"] = { "ʃː", "ʃː" },
}
-- normal, soft, voiced, voiced+soft
local consonant_phonetic = {
["дж"] = { "d͡ʒ̺", "d͡ʒ̺", "d͡ʒ̺", "d͡ʒ̺" },
["ж"] = { "ʒ̺", "ʒ̺", "ʒ̺", "ʒ̺" },
["к"] = { "k", "k", "ɡ", "ɡ" },
["л"] = { "ɫ", "ʎ", "ɫ", "ʎ" },
["п"] = { "p", "p", "b", "b" },
["р"] = { "r̺", "r̺", "r̺", "r̺", },
["с"] = { "s", "ɕ", "z", "ʑ" },
["т"] = { "t", "c", "d", "ɟ" },
["тш"] = { "t͡ʃ̺", "t͡ʃ̺", "d͡ʒ̺", "d͡ʒ̺" },
["ц"] = { "t͡s", "t͡s", "d͡z", "d͡z" },
["ч"] = { "t͡ɕ", "t͡ɕ", "d͡ʑ", "d͡ʑ" },
["ш"] = { "ʃ̺", "ʃ̺", "ʃ̺", "ʃ̺" },
["щ"] = { "ʃ̺ː", "ʃ̺ː", "ʃ̺ː", "ʃ̺ː" },
}
local vowl = "[aäeiɨouɯyə]"
local vowld = "[̞̈]"
local cons = "[bcdfgjklmnprstvxzɕɟɲʃʎʑʒ]"
local consd = "[̺]"
local affricate1 = "[td]"
local affricate2b = "[sɕʃzʑʒ]"
local affricate2 = "͡" .. affricate2b
local postalveolar = {
["sʃ"] = "ʃ̺",
["zʃ"] = "ʒ̺",
["sʒ"] = "ʒ̺",
["zʒ"] = "ʒ̺",
}
local function convert_vowel(c, is_phonetic)
if is_phonetic and vowel_phonetic[c] then
return vowel_phonetic[c]
end
return vowel_phonemic[c]
end
local function convert_consonant(c, is_phonetic, is_soft, is_voiced)
local num = is_soft and 2 or 1
if is_phonetic and consonant_phonetic[c] then
if is_voiced then
num = num + 2
end
return consonant_phonetic[c][num]
end
return consonant_phonemic[c][num]
end
local digraphs = {
["д"] = "зж",
["т"] = "ш"
}
local function split_phonemes(s)
local phonemes = { }
local index = 1
local len = mw.ustring.len(s)
local stress_at = 1
if mw.ustring.match(s, "^-") then
table.insert(phonemes, { nil, "-" })
stress_at = nil
index = 2
end
while index <= len do
if mw.ustring.match(s, "^" .. vowel .. vowel_accent .. "?", index) then
local v = mw.ustring.match(s, "^(" .. vowel .. vowel_accent .. "?)", index)
table.insert(phonemes, { "v", v })
if mw.ustring.find(v, vowel_accent) then
stress_at = nil
end
index = index + mw.ustring.len(v)
elseif mw.ustring.match(s, "^" .. consonant, index) then
local c = mw.ustring.match(s, "^" .. consonant, index)
-- digraphs
if digraphs[c] then
local cc = mw.ustring.match(s, "^" .. c .. "[" .. digraphs[c] .. "]", index)
if cc then c = cc end
end
table.insert(phonemes, { "c", c })
index = index + mw.ustring.len(c)
elseif mw.ustring.match(s, "^ь", index) then
table.insert(phonemes, { "s", nil })
index = index + 1
elseif mw.ustring.match(s, "^ъ", index) then
table.insert(phonemes, { "h", nil})
index = index + 1
elseif mw.ustring.match(s, "^-", index) then
if index < len then
table.insert(phonemes, { nil, "." })
else
table.insert(phonemes, { nil, "-" })
end
index = index + 1
elseif mw.ustring.match(s, "^%s", index) then
if stress_at then
table.insert(phonemes, stress_at, { nil, "ˈ" })
end
table.insert(phonemes, { "w", mw.ustring.sub(s, index, index) })
stress_at = #phonemes + 1
index = index + 1
else -- something else...
table.insert(phonemes, { nil, mw.ustring.sub(s, index, index) })
index = index + 1
end
end
if stress_at then
table.insert(phonemes, stress_at, { nil, "ˈ" })
end
return phonemes
end
local function is_next_consonant(phonemes, i, consonant)
i = i + 1
while i <= #phonemes do
local type, x = phonemes[i][1], phonemes[i][2]
if type == "v" then return false end
if type == "c" then return mw.ustring.match(x, consonant) end
i = i + 1
end
return false
end
local function has_next_consonant(phonemes, i, consonant)
i = i + 1
while i <= #phonemes do
local type, x = phonemes[i][1], phonemes[i][2]
if type == "v" then return false end
if type == "c" then
if mw.ustring.match(x, consonant) then
return true
end
end
i = i + 1
end
return false
end
local function handle_long_digraphs(s)
s = mw.ustring.gsub(s, "([дзлнст])%1([ьяеиёю])", "%1ь%1%2")
for k, v in pairs(digraphs) do
s = mw.ustring.gsub(s, k .. k .. "([" .. v .. "])", k .. "%1" .. k .. "%1")
end
return s
end
function export.convert(s, is_phonetic)
local phonemes = split_phonemes(handle_long_digraphs(s))
local result = ""
local stress_preinserted = false
for i, phoneme in ipairs(phonemes) do
local type, x = phoneme[1], phoneme[2]
if type == "v" then
-- vowel
local v, s = mw.ustring.match(x, "(" .. vowel .. ")(" .. vowel_accent .. "?)", index)
if #s > 0 and not stress_preinserted then
result = result .. "ˈ"
end
stress_preinserted = false
if (i < 2 or phonemes[i - 1][1] ~= "c") and mw.ustring.match(v, vowel_prej) then
result = result .. "j"
end
result = result .. convert_vowel(v, is_phonetic)
elseif type == "c" then
-- consonant
local c = x
local soft = false
local voiced = false
if i < #phonemes then
soft = (phonemes[i + 1][1] == "v" and mw.ustring.match(phonemes[i + 1][2], "^" .. vowel_soft)) or phonemes[i + 1][1] == "s"
if is_phonetic then
if has_next_consonant(phonemes, i, "^" .. consonant_voiced) then
voiced = true
elseif consonant_devoice[c] and is_next_consonant(phonemes, i, "^" .. consonant_unvoiced) then
c = consonant_devoice[c]
end
end
if phonemes[i + 1][1] == "v" and mw.ustring.find(phonemes[i + 1][2], vowel_accent) then
result = result .. "ˈ"
stress_preinserted = true
end
end
result = result .. convert_consonant(c, is_phonetic, soft, voiced)
elseif type == "s" then
-- soft sign, ignore here
elseif type == "h" then
-- hard sign, ignore here
elseif type == "w" then
-- whitespace
result = result .. x
elseif not type then
-- anything else
result = result .. x
end
end
if is_phonetic then
-- assimilation of /s/ and /z/ to following /ʃ/, /ʒ/ or affricate with either
result = mw.ustring.gsub(result, "([sz]ʲ?)([.ˈ%s]*)([ʃʒ]" .. consd .. "*ʲ?)",
function (orig, space, palatal)
return postalveolar[mw.ustring.sub(orig, 1, 1) .. mw.ustring.sub(palatal, 1, 1)] .. space .. palatal
end)
result = mw.ustring.gsub(result, "([sz]ʲ?)([.ˈ%s]*)(" .. affricate1 .. "͡)(" .. affricate2b .. consd .. "*ʲ?)",
function (orig, space, plosive, palatal)
return postalveolar[mw.ustring.sub(orig, 1, 1) .. mw.ustring.sub(palatal, 1, 1)] .. space .. plosive .. palatal
end)
end
-- long consonants
result = mw.ustring.gsub(result, "(" .. affricate1 .. affricate2 .. consd .. "*ʲ?)%1", "%1ː")
if is_phonetic then
result = mw.ustring.gsub(result, "(" .. affricate1 .. ")%1(" .. affricate2 .. consd .. "*ʲ?)", "%1%2ː")
end
result = mw.ustring.gsub(result, "(" .. cons .. ")%1([^͡ʲ])", "%1ː%2")
result = mw.ustring.gsub(result, "(" .. cons .. ")%1$", "%1ː")
return result
end
function export.main(frame)
local title = mw.title.getCurrentTitle().text
if type(frame) == "table" then
title = frame:getParent().args[1] or title
end
title = mw.ustring.gsub(title, "дзз", "дздз")
title = mw.ustring.gsub(title, "джж", "дждж")
title = mw.ustring.gsub(title, "тшш", "тштш")
local phonemic = export.convert(title, false)
local phonetic = export.convert(title, true)
return require("Module:IPA").format_IPA_full {
lang = require("Module:languages").getByCode(langcode),
items = {
{pron = "/" .. phonemic .. "/"},
{pron = "[" .. phonetic .. "]"}
},
}
end
return export