local export = {}
local translit = require("Module:niv-translit")
local gsub = mw.ustring.gsub
local UTF8_char = "[\1-\127\194-\244][\128-\191]*"
-- version of gsub() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = gsub(term, foo, bar)
return retval
end
-- version of gsub() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = gsub(term, foo, bar)
return retval, nsubs > 0
end
-- hacek
local R = "\204\140"
export.encode = {
["ғ"] = "G", ["ӻ"] = "R", ["ӷ"] = "Q",
["ӈ"] = "N", ["ӽ"] = "X", ["ч"] = "c",
["р̌"] = "r", ["ӿ"] = "h",
["ӄ"] = "L", ["ў"] = "w"
}
local tab = {
["А"]='A', ["а"]='a', ["В"]='V', ["в"]='v', ["Е"]='E', ["е"]='e',
["Ё"]='Jo', ["ё"]='jo', ["Г"]='G', ["г"]='g', ["Ӷ"]='Ğ', ["ӷ"]='ğ',
["Ғ"]='Γ', ["ғ"]='γ', ["Ӻ"]='Ġ', ["ӻ"]='ġ', ["Д"]='D', ["д"]='d',
["И"]='I', ["и"]='i', ["Й"]='J', ["й"]='j',
["К"]='K', ["к"]='k', ["Ӄ"]='Q', ["ӄ"]='q', ["Л"]='L', ["л"]='l',
["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', ["Ӈ"]='Ŋ', ["ӈ"]='ŋ',
["О"]='O', ["о"]='o', ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r',
["Р̌"]='Ř', ["р̌"]='ř', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t',
["У"]='U', ["у"]='u', ["Ў"]='W', ["ў"]='w', ["Ф"]='F', ["ф"]='f', ["Х"]='X', ["х"]='x',
["Ӽ"]='Ẋ', ["ӽ"]='ẋ', ["Ӿ"]='H', ["ӿ"]='h', ["Ч"]='Ț’', ["ч"]='ț’',
["Ы"]='Ə', ["ы"]='ə', ["Э"]='E', ["э"]='e', ["Ю"]='Ju', ["ю"]='ju',
["Я"]='Ja', ["я"]='ja', ["’"]='ʼ', ["ʼ"]='ʼ',
-- non-native letters
["Б"]='B', ["б"]='b', ["Ж"]='ž', ["ж"]='ž', ["З"]='Z', ["з"]='z',
["Ц"]='C', ["ц"]='c', ["Ш"]='Š', ["ш"]='š', ["Щ"]='Šč', ["щ"]='šč',
['Ъ']='ʺ', ['ъ']='ʺ', ["Ь"]="’", ["ь"]="’"
}
local palatal = {
{ 'Дj', 'D̦' },
{ 'дj', 'd̦' },
{ 'Нj', 'Ņ' },
{ 'нj', 'ņ' },
{ 'Тj', 'Ț' },
{ 'тj', 'ț' },
}
local function palatalise(text)
text = gsub(text, "[ЯяЁёЮюь]", {
['Я'] = 'Jа', ['я'] = 'jа',
['Ё'] = 'Jо', ['ё'] = 'jо',
['Ю'] = 'Jу', ['ю'] = 'jу',
['ь'] = 'j'
})
return text
end
function export.tr(text, lang, sc)
--[=[
Unfortunately the Cyrillic alphabet doesn't distinguish between ţi and ti
or ţe and te and so on.
Represent iotation with j to allow the palatal consonant replacements.
]=]
text = gsub(text, UTF8_char,
{
['Я'] = 'Ja', ['я'] = 'ja',
['Ё'] = 'Jo', ['ё'] = 'jo',
['Ю'] = 'Ju', ['ю'] = 'ju',
['ь'] = 'j'
}
)
for _, item in ipairs(palatal) do
text = gsub(text, unpack(item))
end
local vowels = {}
for char in string.gmatch("АОУЫЕИЪЬаӣиоуыэеъьaeiou", UTF8_char) do
vowels[char] = true
end
text = mw.ustring.gsub(text,
"(.?)([Ее])",
function (preceding, e)
-- modifier letter apostrophe or right single quotation mark
local capital = e == "Е"
if preceding == "ʼ" or preceding == "’" then
e = capital and "E" or "e"
elseif preceding == "" or vowels[preceding] or mw.ustring.find(preceding, "[^Ѐ-ӿ]") then
e = capital and "Je" or "je"
else
mw.log("Module:niv-translit could not decide how to transliterate " .. e ..
" after " .. preceding .. ".")
end
return preceding .. e
end)
text = gsub(text, "ни", "ņi")
text = gsub(text, "Ни", "Ņi")
text = gsub(text, "НИ", "ŅI")
text = gsub(text, "ди", "d̦i")
text = gsub(text, "Ди", "D̦i")
text = gsub(text, "ДИ", "D̦I")
text = gsub(text, "ти", "ți")
text = gsub(text, "Ти", "Ți")
text = gsub(text, "ТИ", "ȚI")
text = gsub(text, UTF8_char, tab)
return text
end
function export.syllabify(text, accent)
text = palatalise(text)
text = mw.ustring.toNFD(text)
text = ulower(text)
text = gsub(text, ".", export.encode)
local onset_1 = "ckqhrwPTCKDJGRQNXмнфсхвзлйр"
local onset_2 = "hrwJRNXмнфсхвзлйр"
local vowel = "аеиоуэ"
text = gsub(text, "[" .. onset_1 .. "][" .. onset_2 .. "][" .. vowel .. "]", ".%0")
text = gsub(text, "([" .. vowel .. "])([" .. vowel .. "])", "%1.%2")
text = gsub(text, "[" .. onset_1 .. "][" .. vowel .. "]", ".%0")
text = gsub(text, "^%.","")
text = gsub(text, "%.%.+",".")
local syllables = mw.text.split(word,".",true)
syllables.accent = accent
return syllables
end
function export.encipher(text)
text = palatalise(text)
text = gsub(text, ".", export.encode)
text = gsub(text, "п[ʼ’]", "P")
text = gsub(text, "т[ʼ’]", "T")
text = gsub(text, "к[ʼ’]", "K")
text = gsub(text, "L[ʼ’]", "q")
if text:match("ʼ") then error("Uncoupled ’ found.") end
return text
end
return export