-- Based on [[Module:tl-bay sc]] by [[User:Ysrael214]], in turn based on [[Module:tl-pron]]. See respective modules for attribution.
local export = {}
local lang = require("Module:languages").getByCode("ceb")
local sc_Tglg = require("Module:scripts").getByCode("Tglg")
local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local AC = u(0x0301) -- acute = ́
local GR = u(0x0300) -- grave = ̀
local CFLEX = u(0x0302) -- circumflex = ̂
local TILDE = u(0x0303) -- tilde = ̃
local DIA = u(0x0308) -- diaeresis = ̈
local MACRON = u(0x0304) -- macron
local vowel = "aeəiouàèìòù" -- vowel
local V = "[" .. vowel .. "]"
local accent = AC .. GR .. CFLEX .. MACRON
local accent_c = "[" .. accent .. "]"
local stress_c = "[" .. AC .. GR .. "]"
local separator = accent .. "# ./"
local separator_c = "[" .. separator .. "]"
local C = "[^" .. vowel .. separator .. "]" -- consonant
local baybayin_chars = {
["a"] = "ᜀ",
["i"] = "ᜁ",
["u"] = "ᜂ",
["b"] = "ᜊ",
["k"] = "ᜃ",
["d"] = "ᜇ",
["g"] = "ᜄ",
["h"] = "ᜑ",
["l"] = "ᜎ",
["m"] = "ᜋ",
["n"] = "ᜈ",
["ŋ"] = "ᜅ",
["p"] = "ᜉ",
["r"] = "ᜇ",
["s"] = "ᜐ",
["t"] = "ᜆ",
["w"] = "ᜏ",
["y"] = "ᜌ"
}
local baybayin_marks = {
["a"] = "",
["i"] = "ᜒ",
["u"] = "ᜓ",
["+"] = "᜔",
["/"] = "᜕"
}
local baybayin_replace_word = {
["mga"] = "manga"
}
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- ĵ and ɟ are used internally to represent [d͡ʒ] and [j]
function export.transcribe(text, trad, diph)
local debug = {}
text = ulower(text or mw.title.getCurrentTitle().text)
-- decompose everything but ñ and ü
text = mw.ustring.toNFD(text)
text = rsub(text, "." .. "[" .. TILDE .. DIA .."]", {
["n" .. TILDE] = "ñ",
["u" .. TILDE] = "ü",
["e" .. DIA] = "ə",
})
-- convert commas and en/en dashes to IPA foot boundaries
text = rsub(text, "%s*[,–—]%s*", " | ")
-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")
-- canonicalize multiple spaces and remove leading and trailing spaces
local function canon_spaces(text)
text = rsub(text, "%s+", " ")
text = rsub(text, "^ ", "")
text = rsub(text, " $", "")
return text
end
text = canon_spaces(text)
local words = rsplit(text, " ")
for i, word in ipairs(words) do
-- Remove accent mark in checking
if baybayin_replace_word[rsub(word, "^(.*)(" .. accent_c .. ")(.*)$", "%1%3")] then
words[i] = baybayin_replace_word[rsub(word, "^(.*)(" .. accent_c .. ")(.*)$", "%1%3")]
end
end
text = table.concat(words, " ")
-- Convert slashes to bantasan, kulit divider
text = rsub(text, "//", " ᜶ ")
text = rsub(text, "/", trad and ' ᜶ ' or " ᜵ ")
-- Convert hyphens to dot
text = rsub(text, "%-", ".")
-- canonicalize multiple spaces again, which may have been introduced by hyphens
text = canon_spaces(text)
-- now eliminate punctuation
text = rsub(text, "[!?']", "")
-- put # at word beginning and end and double ## at text/foot boundary beginning/end
text = rsub(text, " | ", "# | #")
text = "##" .. rsub(text, " ", "# #") .. "##"
-- Move this early for now
--c, gü/gu+e or i, q
text = rsub(text, "c([ie])", "s%1")
text = rsub(text, "([aeëiou])gü([ie])", "%1ɡw%2")
text = rsub(text, "gü([ie])", "ɡuw%1")
text = rsub(text, "gu([ie])", "ɡ%1")
text = rsub(text, "qu([ie])", "k%1")
text = rsub(text, "ü", "u")
--ll
text = rsub(text, "ll([i]?)([aeëiou])", "ly%2")
-- Correction for vowels with in-between glottal stop, now default
text = rsub_repeatedly(text, "(" .. V .. ")(" .. V .. ")", "%1.%2")
table.insert(debug, text)
-- Reenable "j" sound be equivalent to "dy"
-- Ex. gaja = ga(r)ya not gariya
text = rsub(text, "dj(".. V .. ")" , "dy%1")
text = rsub(text, "j(" .. V .. ")", "dy%1")
-- handle certain combinations; ch ng and sh handling needs to go first
text = rsub(text, "([t]?)ch", "ts")
text = rsub(text, "([n]?)g̃", "ŋ") -- Spanish spelling support
text = rsub(text, "ng", "ŋ")
text = rsub(text, "sh", "sy")
--ck
text = rsub(text, "ck", "k") -- foreign sound in case
--x
text = rsub(text, "([#])x([aeëiou])", "%1s%2")
text = rsub(text, "x", "ks")
table.insert(debug, text)
--alphabet-to-phoneme
text = rsub(text, "[cgjñqrvz7]",
--["g"]="ɡ": U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
{ ["c"] = "k", ["g"] = "ɡ", ["j"] = "ĵ", ["ñ"] = "nj", ["q"] = "k", ["v"] = "b", ["z"] = "s"})
--r
text = rsub(text, "rr", "r")
--determining whether "y" is a consonant or a vowel
--Badlitan treats as consonant regardless
text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
text = rsub(text,"y([ˈˌ.]?)([bćĉdɡhjĵklmnɲŋpɾrsʃtwɟʔ" .. vowel .. "])","i%1%2")
text = rsub(text, "y#", "i")
text = rsub(text, "w(" .. V .. ")","w%1")
text = rsub(text,"w([ˈˌ]?)([bćĉdɡhjĵklmnɲŋpɾrsʃtwɟʔ])","u%1%2")
text = rsub(text, "w#","u")
--text = rsub(text, "sɟ", "ʃ")
table.insert(debug, text)
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
-- Any aeo, or stressed iu, should be syllabically divided from a following aeo or stressed iu.
text = rsub_repeatedly(text, "([aeo]" .. accent_c .. "*)([aeo])", "%1.%2")
text = rsub_repeatedly(text, "([aeo]" .. accent_c .. "*)(" .. V .. stress_c .. ")", "%1.%2")
text = rsub(text, "([əiu]" .. stress_c .. ")([aeo])", "%1.%2")
text = rsub_repeatedly(text, "([əiu]" .. stress_c .. ")(" .. V .. stress_c .. ")", "%1.%2")
text = rsub_repeatedly(text, "i(" .. accent_c .. "*)i", "i%1.i")
text = rsub_repeatedly(text, "u(" .. accent_c .. "*)[ou]", "u%1.u")
table.insert(debug, text)
-- Remove accent marks
text = rsub(text, "^(.*)(" .. accent_c .. ")(.*)$", "%1%3")
table.insert(debug,text)
if (not diph) then
--Corrections for diphthongs
text = rsub(text,"([aeəou])i","%1j") --y
text = rsub(text,"([aeəio])u","%1w") --w
end
table.insert(debug, text)
-- After processing pronunciation, Badlit Start Translate
text = rsub(text, "[əei]", "i")
text = rsub(text, "[ou]", "u")
-- Remove /kt/ like "abstrakt"
text = rsub(text, "kt([#.])", "k%1")
-- Check if there are errors with vowels again
text = rsub(text,"([aiu])([^.]?)([aəiu])","%1.%2%3")
local function baybay_syllable(syll, post, last_vowel)
syll2 = ""
local bay_double = {
["ĵ"] = "d",
["ɡ"] = "g", ["ŋ"] = "N",
}
local function baybay(character)
local bay_soundpre = ''
character = rsub(character, "[ɡ]", "g")
if character == 'ĵ'
then
bay_soundpre = bay_double[character]
bay_soundpre = baybayin_chars[bay_soundpre] .. baybayin_marks[trad and 'i' or '+']
end
if not trad then
character = rsub(character, "[r]", "d")
end
character = rsub(character, "[f]", "p")
character = rsub(character, "[ɟj]", "y")
character = rsub(character, "[N]", "ŋ")
return bay_soundpre .. baybayin_chars[character]
end
if not trad then
-- Remove /h/ as it is not pronounced in between
syll = rsub(syll, "([^h]+)(h+)", "%1")
post = rsub(post, "(h+)", "")
post = rsub(post, "ŋ", bay_double["ŋ"])
post = rsub(post, "ɡ", bay_double["ɡ"])
post = rsub(post, "ĵ", bay_double["ĵ"] .. 's')
for c in post:gmatch('.') do
syll2 = syll2 .. baybay(c) .. baybayin_marks['+']
end
end
syll = rsub(syll, "(" .. C .. "*)(" .. V .. "+)",
function(consonant, vowel)
local bay_char = ''
if string.len(consonant) == 0 then
bay_char = baybay(vowel)
elseif string.len(consonant) == 1 or string.match(consonant, "[ĵŋɡ]") and string.len(consonant) == 2 then
bay_char = baybay(consonant) .. baybayin_marks[vowel]
elseif string.match(consonant, "^(.*)ll$") then
for c in consonant:gmatch('^(.)ll$') do
bay_char = bay_char .. baybay(c) .. baybayin_marks[trad and vowel or '+']
end
bay_char = bay_char .. baybay("l") .. baybayin_marks[trad and "i" or '+']
bay_char = bay_char .. baybay("y") .. baybayin_marks[vowel]
else
-- Two character unicode problems
consonant = rsub(consonant, "ŋ", bay_double["ŋ"])
consonant = rsub(consonant, "ɡ", bay_double["ɡ"])
consonant = rsub(consonant, "ɟ", "y")
for c in consonant:gmatch('.') do
bay_char = bay_char .. baybay(c) .. baybayin_marks[trad and (last_vowel or vowel) or '+']
last_vowel = nil
end
bay_char = rsub(bay_char, baybayin_marks['+'] .. "$", baybayin_marks[vowel])
end
return bay_char
end
)
return syll .. syll2
end
local words = rsplit(text, " ")
for i, word in ipairs(words) do
-- (C)/y/ and --(C)w fixes
-- /h/ being pronounced like fahm, paham
if trad then
word = rsub(word, "([^w" .. vowel .. separator .. "])(w)(" .. V .. ")(" .. C .. "*)([.#]+)", "%1u.%2%3%4%5")
word = rsub(word, "([^ɟ" .. vowel .. separator .. "])(ɟ)(" .. V .. ")(" .. C .. "*)([.#]+)", "%1i.%2%3%4%5")
word = rsub(word, "(" .. C .. "*)(" .. V .. ")(h)(" .. C .. "+)([.#]+)", "%1%2.%3%2%4%5")
end
local syllables = rsplit(word, "[.]")
local last_vowel = nil
for j = 1, #syllables do
if string.match(syllables[j], V) then
syllables[j] = rsub(syllables[j], "^([#]*)(" .. C .. "*)(" .. V .. "+)(" .. C .. "*)([#]*)$",
function(temp1 ,pre, vowel, post, temp2)
retval = temp1 .. baybay_syllable(pre .. vowel, post, last_vowel) .. temp2
last_vowel = string.match(post, "[mn]") and vowel or nil
return retval
end
)
elseif not string.match(syllables[j], "[᜵᜶]") then
-- This is only a fallback when no vowel is entered
syllables[j] = rsub(syllables[j], "^([#]*)(" .. C .. "+)([#]*)$",
function(temp1 , consonant , temp2)
if trad then
return temp1 .. baybay_syllable(consonant .. "a", "") .. temp2
else
return temp1 .. baybay_syllable("", consonant) .. temp2
end
end
)
end
end
words[i] = table.concat(syllables, "")
end
text = table.concat(words, " ")
-- remove # symbols at word and text boundaries
text = rsub(text, "#", "")
text = canon_spaces(text)
return mw.ustring.toNFC(text)
end
function export.show(frame)
local params = {
[1] = {},
["trad"] = {},
["diph"] = {},
["disp"] = {},
["pre"] = {},
["tr"] = {},
["r"] = {},
-- ["bullets"] = {type = "number", default = 1},
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local results = {}
local text = args[1] or mw.title.getCurrentTitle().text
local disp = args.disp or false
local trad = args.trad or false
local diph = args.diph or false
results = export.transcribe(text, trad, diph)
-- Baybayin to Latin
local tr = args["tr"] or 0
if tr == '1' then
tr = (lang:transliterate(results, sc_Tglg))
tr = rsub(tr, "%s[,]", ",")
tr = rsub(tr, "%s[.]", ".")
tr = ' (' .. tr .. ')'
elseif tr == '2' then
tr = text
tr = rsub(tr, "[.]", "")
tr = rsub(tr, "//", ".")
tr = rsub(tr, "/", ",")
tr = ' (' .. tr .. ')'
else
tr = ''
end
local pre = args.pre and args.pre .. " " or ""
if disp then
results = '<span class="' .. sc_Tglg:getCode() .. '" lang="' .. lang:getCode() .. '">' .. results .. "</span>"
else
results = results
end
return pre .. results .. tr
end
return export