local data = {}
local U = mw.ustring.char
local anusvAra = U(0x11001)
local visarga = U(0x11002)
local virAma = U(0x11046)
local consonants = "𑀓𑀔𑀕𑀖𑀗𑀘𑀙𑀚𑀛𑀜𑀝𑀞𑀟𑀠𑀡𑀢𑀣𑀤𑀥𑀦𑀧𑀨𑀩𑀪𑀫𑀬𑀭𑀮𑀯𑀰𑀱𑀲𑀳𑀴𑀵𑀶𑀷"
local consonant = "[" .. consonants .. "]"
local acute = U(0x301) -- combining acute
data["psu"] = {
[1] = {
["ai"] = "𑀐",
["au"] = "𑀒",
},
[2] = {
["ḹ"] = "𑀎",
["ṃ"] = anusvAra,
["ḥ"] = visarga,
["kh"] = "𑀔",
["gh"] = "𑀖",
["ṅ"] = "𑀗",
["ch"] = "𑀙",
["jh"] = "𑀛",
["ñ"] = "𑀜",
["ṭh"] = "𑀞",
["ḍh"] = "𑀠",
["ṇ"] = "𑀡",
["th"] = "𑀣",
["dh"] = "𑀥",
["n"] = "𑀦",
["ph"] = "𑀨",
["bh"] = "𑀪",
["m"] = "𑀫",
["y"] = "𑀬",
["r"] = "𑀭",
["l"] = "𑀮",
["v"] = "𑀯",
["ś"] = "𑀰",
["ṣ"] = "𑀱",
["s"] = "𑀲",
},
[3] = {
["a"] = "𑀅",
["ā"] = "𑀆",
["i"] = "𑀇",
["ī"] = "𑀈",
["u"] = "𑀉",
["ū"] = "𑀊",
["e"] = "𑀏",
["o"] = "𑀑",
["ṝ"] = "𑀌",
["ḷ"] = "𑀍",
["k"] = "𑀓",
["g"] = "𑀕",
["c"] = "𑀘",
["j"] = "𑀚",
["ṭ"] = "𑀝",
["ḍ"] = "𑀟",
["t"] = "𑀢",
["d"] = "𑀤",
["n"] = "𑀦",
["p"] = "𑀧",
["b"] = "𑀩",
["h"] = "𑀳",
},
[4] = {
['̈'] = "",
["ṛ"] = "𑀋",
["[%-/]"] = "", -- a-i, a-u for अइ, अउ
["(" .. consonant .. ")" .. "(" .. consonant .. ")"] = "%1" .. virAma .. "%2",
["(" .. consonant .. ")$"] = "%1" .. virAma,
[acute] = "",
},
[5] = { -- this rule must be applied twice because a consonant may only be in one caoture per operation, so "CCC" will only recognize the first two consonants
["(" .. consonant .. ")" .. "(" .. consonant .. ")"] = "%1" .. virAma .. "%2",
["i"] = "𑀇",
["u"] = "𑀉",
},
[6] = { -- This table is filled below
},
}
local vowels = {
["𑀅"] = "",
["𑀇"] = U(0x1103A),
["𑀉"] = U(0x1103C),
["𑀋"] = U(0x1103E),
["𑀍"] = U(0x11040),
["𑀏"] = U(0x11042),
["𑀑"] = U(0x11044),
["𑀆"] = U(0x11038),
["𑀈"] = U(0x1103B),
["𑀊"] = U(0x1103D),
["𑀌"] = U(0x1103F),
["𑀎"] = U(0x11041),
["𑀐"] = U(0x11043),
["𑀒"] = U(0x11045),
}
for independentForm, diacriticalForm in pairs(vowels) do
data["psu"][6]["(" .. consonant .. ")" .. independentForm] = "%1" .. diacriticalForm
end
-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["psu-tr"] = {
[1] = {
["A"] = "ā",
["I"] = "ī",
["U"] = "ū",
["J"] = "ñ",
["T"] = "ṭ",
["D"] = "ḍ",
["N"] = "ṇ",
["G"] = "ṅ",
["z"] = "ś",
["S"] = "ṣ",
["M"] = "ṃ",
["H"] = "ḥ",
["lRR"] = "ḹ",
["/"] = acute,
},
[2] = {
["lR"] = "ḷ",
["RR"] = "ṝ",
},
[3] = {
["R"] = "ṛ",
},
}
return data