local data = {}
local U = mw.ustring.char
local anusvAra = U(0x1163D)
local visarga = U(0x1163E)
local virAma = U(0x1163F)
local zwj = U(0x200D)
local avagraha = "ऽ"
local consonants = "𑘎𑘏𑘐𑘑𑘒𑘓𑘔𑘕𑘖𑘗𑘘𑘙𑘚𑘛𑘜𑘝𑘞𑘟𑘠𑘡𑘢𑘣𑘤𑘥𑘦𑘧𑘨𑘩𑘪𑘯𑘫𑘬𑘭𑘮"
local consonant = "[" .. consonants .. "]"
local acute = U(0x301) -- combining acute
data["omr"] = {
-- Vowels and modifiers. Do the diphthongs and diaereses first.
{"ai", "𑘋"},
{"au", "𑘍"},
{"ï", "𑘃"},
{"i", "𑘃"},
{"ī", "𑘃"},
{"ü", "𑘄"},
{"u", "𑘄"},
{"ū", "𑘄"},
{"a", "𑘀"},
{"ā", "𑘁"},
{"e", "𑘊"},
{"o", "𑘌"},
{"ṝ", "𑘇"},
{"ṛ", "𑘆"},
{"r̥", "𑘆"},
{"ṟ", "𑘨"..virAma.. zwj}, -- eyelash र
{"ḹ", "𑘉"},
{"l̥", "𑘈"},
{"(𑘀)[%-/]([𑘃𑘄])", "%1%2"}, -- a-i, a-u for 𑘀𑘃, 𑘀𑘄; must follow rules for "ai", "au"
-- Two-letter consonants must go before h.
{"kh", "𑘏"},
{"gh", "𑘑"},
{"ch", "𑘔"},
{"jh", "𑘖"},
{"ṭh", "𑘙"},
{"ḍh", "𑘛"},
{"th", "𑘞"},
{"dh", "𑘠"},
{"ph", "𑘣"},
{"bh", "𑘥"},
{"h", "𑘮"},
-- Other stops.
{"k", "𑘎"},
{"g", "𑘐"},
{"c", "𑘓"},
{"j", "𑘕"},
{"ṭ", "𑘘"},
{"ḍ", "𑘚"},
{"t", "𑘝"},
{"d", "𑘟"},
{"p", "𑘢"},
{"b", "𑘤"},
-- Nasals.
{"ṅ", "𑘒"},
{"ñ", "𑘗"},
{"ṇ", "𑘜"},
{"n", "𑘡"},
{"m", "𑘦"},
-- Remaining consonants.
{"y", "𑘧"},
{"r", "𑘨"},
{"l", "𑘩"},
{"v", "𑘪"},
{"ś", "𑘫"},
{"ṣ", "𑘬"},
{"s", "𑘭"},
{"ḷ", "𑘯"},
{"ṃ", anusvAra},
{"ḥ", visarga},
{"'", avagraha},
-- This rule must be applied twice because a consonant may only be in one capture per operation,
-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
}
local vowels = {
["𑘁"] = U(0x11630),
["𑘂"] = U(0x11631),
["𑘃"] = U(0x11632),
["𑘄"] = U(0x11633),
["𑘅"] = U(0x11634),
["𑘆"] = U(0x11635),
["𑘇"] = U(0x11636),
["𑘈"] = U(0x11637),
["𑘉"] = U(0x11638),
["𑘊"] = U(0x11639),
["𑘋"] = U(0x1163A),
["𑘌"] = U(0x1163B),
["𑘍"] = U(0x1163C),
}
-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data["omr"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["omr"], {"(" .. consonant .. ")𑘀", "%1"})
data["omr-tr"] = {
[1] = {
["A"] = "ā",
["I"] = "ī",
["U"] = "u",
["J"] = "ñ",
["T"] = "ṭ",
["D"] = "ḍ",
["N"] = "ṇ",
["G"] = "ṅ",
["z"] = "ś",
["S"] = "ṣ",
["M"] = "ṃ",
["H"] = "ḥ",
["LRR"] = "ḹ",
["/"] = acute,
},
[2] = {
["LR"] = "l̥",
["RR"] = "ṝ",
["r_"] = "ṟ",
},
[3] = {
["R"] = "ṛ",
["L"] = "ḷ",
},
}
return data