模組:Mr-Modi-translit
這個模組會將馬拉地語未確定的文字拉丁化。
最好不要直接從模板或其他模組調用此模組。要從模板中使用它,請以{{xlit}}
做為替代;若要在模組中使用,則以Module:languages#Language:transliterate替代。
關於測試用例,請參閱Module:Mr-Modi-translit/testcases。
函數
编辑tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. When the transliteration fails, returnsnil
.
local export = {}
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local ZWJ = U(0x200D)
local conv = {
-- consonants
['𑘎']='k', ['𑘏']='kh', ['𑘐']='g', ['𑘑']='gh', ['𑘒']='ṅ',
['𑘓']='c', ['𑘔']='ch', ['𑘕']='j', ['𑘖']='jh', ['𑘗']='ñ',
['𑘘']='ṭ', ['𑘙']='ṭh', ['𑘚']='ḍ', ['𑘛']='ḍh', ['𑘜']='ṇ',
['𑘝']='t', ['𑘞']='th', ['𑘟']='d', ['𑘠']='dh', ['𑘡']='n',
['𑘢']='p', ['𑘣']='ph', ['𑘤']='b', ['𑘥']='bh', ['𑘦']='m',
['𑘧']='y', ['𑘨']='r', ['𑘩']='l', ['𑘪']='v', ['𑘯']='ḷ',
['𑘫']='ś', ['𑘬']='ṣ', ['𑘭']='s', ['𑘮']='h',
['𑘨𑘿'..ZWJ] = 'r',
-- ['𑘕𑘿𑘗'] = 'dny',
-- vowel diacritics
---- only in script charts: ['𑘱'] = 'i', ['𑘴'] ='ū',
['𑘳'] = 'u', ['𑘹'] = 'e', ['𑘻'] = 'o',
['𑘰'] = 'ā', ['𑘲'] = 'ī',
['𑘵'] = 'ru',
['𑘺'] = 'ai', ['𑘼'] = 'au',
-- ['𑘰𑙀'] = 'ŏ',
['𑙀'] = 'ĕ',
-- vowel signs
---- only in script charts: ['𑘂'] = 'i', ['𑘅'] ='ū',
['𑘀'] = 'a', ['𑘄'] = 'u', ['𑘊'] = 'e', ['𑘌'] = 'o',
['𑘁'] = 'ā', ['𑘃'] = 'ī',
['𑘆'] = 'ŕ',
['𑘋'] = 'ai', ['𑘍'] = 'au',
['𑘁𑙀'] = 'ŏ',
['𑘀𑙀'] = 'ĕ', ['𑘊𑙀'] = 'ĕ',
['𑘌𑘦𑘿'] = 'om',
-- chandrabindu
--- ['𑙀𑘽'] = '̃',
-- anusvara
['𑘽'] = 'ṁ',
-- visarga
['𑘾'] = 'ḥ',
-- virama
['𑘿'] = '',
-- numerals
['𑙐'] = '0', ['𑙑'] = '1', ['𑙒'] = '2', ['𑙓'] = '3', ['𑙔'] = '4',
['𑙕'] = '5', ['𑙖'] = '6', ['𑙗'] = '7', ['𑙘'] = '8', ['𑙙'] = '9',
-- punctuation
['𑙁'] = '.', -- danda
['𑙂'] = '.', -- double danda
['+'] = '', -- compound separator
-- abbreviation sign
['𑙃'] = '.',
}
local nasal_assim = {
['𑘎'] = '𑘒', ['𑘏'] = '𑘒', ['𑘐'] = '𑘒', ['𑘑'] = '𑘒',
['𑘓'] = '𑘗', ['𑘔'] = '𑘗', ['𑘕'] = '𑘗', ['𑘖'] = '𑘗',
['𑘘'] = '𑘜', ['𑘙'] = '𑘜', ['𑘚'] = '𑘜', ['𑘛'] = '𑘜',
['𑘢'] = '𑘦', ['𑘣'] = '𑘦', ['𑘤'] = '𑘦', ['𑘥'] = '𑘦', ['𑘦'] = '𑘦',
['𑘧'] = 'i', ['𑘨'] = '𑘄', ['𑘩'] = '𑘩', ['𑘪'] = '𑘄',
['𑘫'] = '𑘄', ['𑘬'] = '𑘄', ['𑘭'] = '𑘄', ['𑘮'] = '𑘄',
}
local perm_cl = {
['𑘦𑘿𑘩'] = true, ['𑘪𑘿𑘩'] = true, ['𑘡𑘿𑘩'] = true,
}
local all_cons, special_cons = '𑘎𑘏𑘐𑘑𑘒𑘓𑘔𑘕𑘖𑘗𑘘𑘙𑘚𑘛𑘝𑘞𑘟𑘠𑘢𑘣𑘤𑘥𑘫𑘬𑘭𑘧𑘨𑘩𑘪𑘮𑘜𑘡𑘦𑘯', '𑘟𑘝𑘧𑘨𑘩𑘪𑘮𑘡𑘦'
local vowel, vowel_sign = '%*a𑘱𑘳𑘵𑘹𑘻𑘰𑘲𑘴𑘺𑘼𑙀', '𑘀𑘂𑘄𑘊𑘌𑘁𑘃𑘅𑘆𑘋𑘍𑘀𑙀'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])([' .. all_cons .. '])a([' .. all_cons .. '])([ं]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local char_array, i = {}, 1
for char in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern
char_array[i] = char
i = i + 1
end
return table.concat(require("Module:table").reverse(char_array))
end
function export.tr(text, lang, sc)
-- text = gsub(text, 'ाँ', 'ॉ' .. 'ं')
-- text = gsub(text, 'ँ', 'ॅ' .. 'ं')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])𑘽 ', '%1𑘀 ')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])𑘽$', '%1𑘀')
text = gsub(text, '([' .. all_cons .. '])([' .. vowel .. '𑘿]?)', function(c, d)
return c .. (d == "" and 'a' or d) end)
for word in mw.ustring.gmatch(text, "[𑘀-𑙙a]+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, '^a([' .. all_cons .. '][' .. vowel .. vowel_sign .. '])', '%1')
while find(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(word, '(.?)𑘽(.)', function(succ, prev)
return succ .. (succ..prev == "a" and "𑘿𑘦" or
(succ == "" and find(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "n")) .. prev end)
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, '.', conv)
text = gsub(text, 'a([iu])̃', 'a͠%1')
text = gsub(text, 'aa', 'a')
text = gsub(text, 'ñjñ', 'ndny')
text = gsub(text, 'jñ', 'dny')
return mw.ustring.toNFC(text)
end
return export