local data = {}
local U = require("Module:string/char")
local anusvAra = U(0x11181)
local visarga = U(0x11182)
local virAma = U(0x111C0)
local avagraha = "𑇁"
local consonants = "𑆑𑆒𑆓𑆔𑆕𑆖𑆗𑆘𑆙𑆚𑆛𑆜𑆝𑆞𑆟𑆠𑆡𑆢𑆣𑆤𑆥𑆦𑆧𑆨𑆩𑆪𑆫𑆬𑆮𑆭𑆯𑆰𑆱𑆲"
local consonant = "[" .. consonants .. "]"
local acute = U(0x301) -- combining acute
data["sa-Shrd"] = {
-- Vowels and modifiers. Do the diphthongs and diaereses first.
{"ai", "𑆎"},
{"au", "𑆐"},
{"ä", "𑆃"},
{"ö", "𑆏"},
{"ï", "𑆅"},
{"ü", "𑆇"},
{"a", "𑆃"},
{"ā", "𑆄"},
{"i", "𑆅"},
{"ī", "𑆆"},
{"u", "𑆇"},
{"ū", "𑆈"},
{"e", "𑆍"},
{"o", "𑆏"},
{"ṝ", "𑆊"},
{"ṛ", "𑆉"},
{"r̥", "𑆉"},
{"ḹ", "𑆌"},
{"ḷ", "𑆋"},
{"(𑆃)[%-/]([𑆅𑆇])", "%1%2"}, -- a-i, a-u for अइ, अउ; must follow rules for "ai", "au"
-- Two-letter consonants must go before h.
{"kh", "𑆒"},
{"gh", "𑆔"},
{"ch", "𑆗"},
{"jh", "𑆙"},
{"ṭh", "𑆜"},
{"ḍh", "𑆞"},
{"th", "𑆡"},
{"dh", "𑆣"},
{"ph", "𑆦"},
{"bh", "𑆨"},
{"h", "𑆲"},
-- Other stops.
{"k", "𑆑"},
{"g", "𑆓"},
{"c", "𑆖"},
{"j", "𑆘"},
{"ṭ", "𑆛"},
{"ḍ", "𑆝"},
{"t", "𑆠"},
{"d", "𑆢"},
{"p", "𑆥"},
{"b", "𑆧"},
-- Nasals.
{"ṅ", "𑆕"},
{"ñ", "𑆚"},
{"ṇ", "𑆟"},
{"n", "𑆤"},
{"m", "𑆩"},
-- Remaining consonants.
{"y", "𑆪"},
{"r", "𑆫"},
{"l", "𑆬"},
{"v", "𑆮"},
{"ś", "𑆯"},
{"ṣ", "𑆰"},
{"s", "𑆱"},
{"ṃ", anusvAra},
{"ḥ", visarga},
{"'", avagraha},
-- This rule must be applied twice because a consonant may only be in one capture per operation,
-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
}
local vowels = {
["𑆄"] = U(0x111B3),
["𑆅"] = U(0x111B4),
["𑆆"] = U(0x111B5),
["𑆇"] = U(0x111B6),
["𑆈"] = U(0x111B7),
["𑆉"] = U(0x111B8),
["𑆊"] = U(0x111B9),
["𑆋"] = U(0x111BA),
["𑆌"] = U(0x111BB),
["𑆍"] = U(0x111BC),
["𑆎"] = U(0x111BD),
["𑆏"] = U(0x111BE),
["𑆐"] = U(0x111BF),
}
-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data["sa-Shrd"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["sa-Shrd"], {"(" .. consonant .. ")𑆃", "%1"})
-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["sa-Shrd-tr"] = {
[1] = {
["A"] = "ā",
["I"] = "ī",
["U"] = "ū",
["J"] = "ñ",
["T"] = "ṭ",
["D"] = "ḍ",
["N"] = "ṇ",
["G"] = "ṅ",
["z"] = "ś",
["S"] = "ṣ",
["M"] = "ṃ",
["H"] = "ḥ",
["lRR"] = "ḹ",
["/"] = acute,
},
[2] = {
["lR"] = "ḷ",
["RR"] = "ṝ",
},
[3] = {
["R"] = "ṛ",
},
}
return data