local export = {}

local m_IPA = require("Module:IPA")
local m_hyph = require("Module:fi-hyphenation")
local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local lower = m_str_utils.lower
local match = m_str_utils.match
local sub = m_str_utils.sub
local U = m_str_utils.char

local langcode = "fi"

local nonsyllabic = U(0x32F) -- inverted breve below
local unreleased = U(0x31A)
local nasalized = U(0x303)
local long = "ː"

local letters_phonemes = {
	["a"] = "ɑ",
	["ä"] = "æ",
	["ö"] = "ø",
	["å"] = "o",
	
	["g"] = "ɡ",
	["q"] = "k",
	["v"] = "ʋ",
	["š"] = "ʃ",
	["ž"] = "ʒ",
	
	["x"] = "ks",
	["zz"] = "ts",
	["ng"] = "ŋː",
	["nk"] = "ŋk",
	["nkk"] = "ŋkː",
	["qu"] = "kʋ",
	["*"] = "ˣ",
	["'"] = ".",
}

local vowels = "ɑeiouyæø"
local vowel = "[" .. vowels .. "]"
local consonants = "kptɡgbdfʔsnmŋlrhʋʃʒrjçɦx"
local consonant = "[" .. consonants .. "]"
local diacritics = "̝̞̠̪"
local diacritic = "[" .. diacritics .. "]"

local spelled_consonants = "cšvwxzž"
local spelled_consonant = "[" .. consonants .. spelled_consonants .. "]"
local spelled_vowels = "aäö"
local spelled_vowel = "[" .. vowels .. spelled_vowels .. "]"

local tertiary = "ˌ" -- "tertiary stress", a weaker secondary stress (either rhythmic or in some compound words). is there a better way to represent this?
export.tertiary = tertiary

local stress_indicator = "[ ˈˌ" .. tertiary .. "/+-]"
export.stress_indicator = stress_indicator
local plosives = "kptbdɡ"

local wordlet_sep = "[/-]"

local stress_p = "[ˈˌ" .. tertiary .. "]"
local stress_s = "[ˌ" .. tertiary .. "]"
local stress_pd = "[ˈˌ" .. tertiary .. "%.]"
local stress_sd = "[ˌ" .. tertiary .. "%.]"

local replacements_narrow = {
	["ɑ"] = "ɑ̝",
	["e"] = "e̞",
	["ø"] = "ø̞",
	["o"] = "o̞",
	["t"] = "t̪",
	["s"] = "s̠"
}

--	This adds letters_phonemes["e"] = "e", letters_phonemes["i"] = "i", etc.
for letter in gmatch("eiouydhfjklmnprstu", ".") do
	letters_phonemes[letter] = letter
end

--[[	This regex finds the diphthongs in the IPA transcription,
		so that the nonsyllabic diacritic can be added.						]]
-- /_i/ diphthongs can appear in any syllable
local diphthongs_i = {
	"[ɑeouyæø]i"
}
-- /_U/ diphthongs can appear in the initial syllable or later open syllables (no consonantal coda)
local diphthongs_u = {
	"[ɑoei]u",
	"[eiæø]y",
}
-- rising diphthongs can only appear in the initial syllable (of a word, compound word part, etc.)
local diphthongs_rising = {
	"uo",
	"ie",
	"yø",
}

local function apply_post_fixes(p)
	-- initial <gn> is /gn/
	p = gsub(p, "ˈŋn", "ˈɡn")

	-- ŋ is short before consonant (by default)
	p = gsub(p, "ŋ"..long.."("..consonant..")", "ŋ%1")

	-- dissimilation of vowels by sandhi
	p = gsub(p, "("..vowel..diacritic.."*)(["..long..nonsyllabic.."]?)("..stress_s..")%1", "%1%2%3(ʔ)%1")

	-- C1(.):C2 -> C1(.)(:)C2
	p = gsub(p, "("..consonant..diacritic.."*)"..long.."("..consonant..diacritic.."*)", "%1("..long..")%2")
	p = gsub(p, "("..consonant..diacritic.."*)%.%1("..consonant..diacritic.."*)", "%1.(%1)%2")
	p = gsub(p, "("..consonant..diacritic.."*)%(%.?%)%1("..consonant..diacritic.."*)", "%1(.%1)%2")
	
	return p
end

local function apply_post_fixes_narrow(p)
	-- C1:+C2 -> C1+(C1)C2
	p = gsub(p, "("..consonant..diacritic.."*)"..long..tertiary.."("..consonant..diacritic.."*)", "%1"..tertiary.."(%1)%2")
	-- C1+C1C2 -> C1+(C1)C2
	-- but C1C2+C2C3 --> C1(C2)+C2C3
	p = gsub(p, "("..consonant..diacritic..")("..consonant..diacritic.."*)"..tertiary.."(%.?)%2("..consonant..diacritic.."*)", "%1(%2)"..tertiary.."%2%3%4")
	p = gsub(p, "("..consonant..diacritic.."*)"..tertiary.."(%.?)%1("..consonant..diacritic.."*)", "%1"..tertiary.."(%1)%2%3")
	p = gsub(p, "("..consonant..diacritic.."*)"..tertiary.."%(?%.?%)%1("..consonant..diacritic.."*)", "%1"..tertiary.."(.%1)%2")
	
	-- t is alveolar in /ts/ and /st/
	p = gsub(p, "t̪("..stress_pd.."?%s*)s̠", "t%1s̠")
	p = gsub(p, "s̠("..stress_pd.."?%s*)t̪", "s̠%1t")
	-- n is dental in /nt/
	p = gsub(p, "n("..stress_pd.."?%s*)t̪", "n̪%1t̪")
	p = gsub(p, "t̪("..stress_pd.."?%s*)n", "t̪%1n̪")
	-- l allophone
	p = gsub(p, "l("..stress_pd.."?%s*)t̪", "l̪%1t̪")
	p = gsub(p, "t̪("..stress_pd.."?%s*)l", "t̪%1l̪")

	-- long j, v after i, u diphthong
	p = gsub(p, "(i"..nonsyllabic..")j("..vowel..")", "%1j("..long..")%2")
	-- /ʋ/ after /u/ usually realized as /w/ (see Suomi, Toivanen and Ylitalo 2008, p. )
	p = gsub(p, "(u"..nonsyllabic..")ʋ("..vowel..")", "%1w("..long..")%2")
	-- cleanup
	p = gsub(p, "("..stress_s..")%.", "%1")
	p = gsub(p, "("..stress_pd..")"..stress_s, "%1")

    -- tautosyllabic nasals nasalize vowels between them (see Suomi, Toivanen and Ylitalo 2008, p. 22)
	--p = gsub(p, "([mnŋ]"..long.."?)("..vowel..")("..diacritic.."*)([mnŋ])(.?)", function (n0, nv, nvd, n1, anchor)
	--	-- this cannot be simplified to "(.?)" => "([^" .. vowels .. "]?)", otherwise a vowel after would match
	--	if not find(anchor, vowel) then
	--		return n0 .. nv .. nasalized .. nvd .. n1 .. anchor
	--	end
	--end)
	
	-- sandhi: nm > mm, np > mp, nb > mb, nk > ŋk, ng > ŋg
	p = gsub(p, "nm", "m" .. long)
	p = gsub(p, "n("..stress_pd.."?%s*)([ɡk])", "ŋ%1%2")
	p = gsub(p, "n("..stress_pd.."?%s*)([mpb])", "m%1%2")
	p = gsub(p, "[nm]("..stress_pd.."?%s*)([f])", "ɱ%1%2")

	-- handle potentially long consonants over secondary stresses
	p = gsub(p, "("..consonant..diacritic.."*)%("..long.."%)("..stress_s..")", "%2%1("..long..")")
	p = gsub(p, "("..stress_s..")("..consonant..diacritic.."*)%("..long.."%)", "(%2)%1%2")
	p = gsub(p, "%(([kptbdɡ])%)("..stress_s..")%1", "(%1"..unreleased..")%2%1")
    p = gsub(p, "(ŋ"..diacritic.."*)"..tertiary.."ɡ", "%1"..tertiary.."ŋ")
	
	-- [k] allophone before front vowels (see Suomi, Toivanen and Ylitalo 2008, p. 27)
	p = gsub(p, "k([eiyæø])", "k̟%1")
	
	return p
end

-- -1 = degenerate
-- 0 = light
-- 1 = heavy with coda
-- 2 = heavy without coda (long vowel/diphthong)
-- 99 = stressed
local class_stressed_syllable = 99
local function classify_syllable(syllable)
	if match(syllable, "^" .. stress_indicator) then
		return class_stressed_syllable
	end
	local cleaned = gsub(syllable, "%(?%*?%)?$", "")
	if match(cleaned, spelled_vowel .. spelled_vowel .. "$") then
		return 2
	elseif match(cleaned, spelled_consonant .. "$") then
		return match(cleaned, spelled_vowel) and 1 or -1
	elseif match(cleaned, spelled_vowel) then
		return 0
	end
	return -1
end

local function has_hiatus(left, right)
	return match(left, spelled_vowel .. "$") and match(right, "^%(?%.?%)?" .. spelled_vowel)
end

local trad_lookahead = 2 -- how many unstressed syllables at most in a single unit, thus max consecutive unstressed syllables
local function add_rhythmic_stress_ltr(hyph, classes)
	-- traditional syllable stress assigning algorithm (e.g. in (V)ISK)
	-- assign from left to right, but shift to later syllables if a light syllable is followed by a heavy syllable

	local add_stress = {}
	local n = #classes
	local prev_stress

	for index, class in ipairs(classes) do
		local stressed = class == class_stressed_syllable
			
		if not stressed and not prev_stress and index < n then
			-- shift stress if current syllable light and a heavy syllable occurs later (except as the last syllable)

			local has_later_heavy_syllable
			local lookahead_end = math.min(index + trad_lookahead, n - 1)
			for i = index + 1, lookahead_end do
				if classes[i] > 0 then
					has_later_heavy_syllable = true
					break
				end
			end

			stressed = index == n - 1 or (class > 0) or not has_later_heavy_syllable
		end
		
		-- check if next syllable already stressed, and do not stress this syllable if so
		if stressed and index < n then
			stressed = stressed and classes[index + 1] ~= class_stressed_syllable
		end

		if index > 1 and stressed then
			add_stress[index] = true
		end
		prev_stress = stressed
	end

	return add_stress
end

local function add_rhythmic_stress_rtl(hyph, classes)
	-- based on Karvonen (2005) "Word prosody in Finnish"
	-- run algorithm for assigning stresses:
	--	(1) analyze each segment started by an already stressed syllable
	--      as a separate prosodic word
	--  (2) assign from *right to left*
	--  (3) final syllable is never stressed (not by this algorithm)
	--  (4) stress may shift back by a single (unstressed) syllable
	--      to a better candidate:
	--			(C)VV is better than (C)V
	--			(C)VC is better than (C)V and (C)VV
	--  (5) if the final syllable in a prosodic word begins with a vowel
	--      and the previous syllable ends in a vowel, skip it

	local add_stress = {}
	local foot = 0
	local word_final = true
	local index = #classes

	while index > 0 do
		local stressed = classes[index] == class_stressed_syllable

		if stressed then
			-- reset
			foot = 0
			word_final = true
		elseif word_final and index > 1 and has_hiatus(hyph[index - 1], hyph[index]) then
			-- hiatus break at the end of a prosodic word: do nothing

		elseif foot == 0 then
			foot = foot + 1

		-- try to assign a stress.
		-- is the previous syllable stressed?
		elseif index > 1 and classes[index - 1] == class_stressed_syllable then
			-- no two stresses in a row, so do nothing.
		else
			-- we will assign a stress somewhere.
			local class = classes[index]
			-- is this syllable preceded by an unstressed syllable
			-- with more priority?
			if index > 2 and classes[index - 2] ~= class_stressed_syllable and 
							classes[index - 1] > classes[index] then
				-- shift back by one syllable
				index = index - 1
			end

			-- assign stress, and continue.
			add_stress[index] = true
			foot = 0
		end
		index = index - 1
		word_final = false
	end

	return add_stress
end

local function add_rhythmic_stress_collect(hyph, classes, add_stress)
	-- add stress marks
	local rhythmic_stress = tertiary
	local collected = {}
	for index = 1, #hyph do
		if add_stress[index] and classes[index] < class_stressed_syllable then
			collected[index] = rhythmic_stress .. gsub(hyph[index], "^%.", "")
		else
			collected[index] = hyph[index]
		end
	end
	return table.concat(collected)
end

-- applied *before* IPA conversion
-- returns one result, or possibly two results
function export.add_rhythmic_stress(word, assign_rtl)
	-- keep_sep_symbols = true
	local hyph = m_hyph.generate_hyphenation(word, true)
	local final_heavy = false
	local collected
	local add_rhythmic_stress_impl = assign_rtl and add_rhythmic_stress_rtl or add_rhythmic_stress_ltr

	-- find stressed syllables and classify all syllables
	local classes = {}
	for index, syllable in ipairs(hyph) do
		if index == 1 then
			classes[index] = class_stressed_syllable
		else
			classes[index] = classify_syllable(syllable)
		end
	end

	local add_stress
	-- final heavy syllable may be stressed if preceded by an unstressed
	-- light syllable (in words with at least 3 syllables)
	final_heavy = #classes >= 3 and classes[#classes] > 0 and classes[#classes - 1] == 0

	add_stress = add_rhythmic_stress_impl(hyph, classes)

	local res1 = add_rhythmic_stress_collect(hyph, classes, add_stress)
	local res2

	if final_heavy then
		-- make final syllable stressed
		local actual_final = classes[#classes]
		classes[#classes] = class_stressed_syllable
		add_stress = add_rhythmic_stress_impl(hyph, classes)
		-- so that add_rhythmic_stress_collect actually adds it
		classes[#classes] = actual_final
		add_stress[#classes] = true
		res2 = add_rhythmic_stress_collect(hyph, classes, add_stress)
	end

	return res1, res2
end

local function handle_diphthongs(IPA, strict_initial)
	-- Add nonsyllabic diacritic after last vowel of diphthong.
	for _, diphthong_regex in pairs(diphthongs_i) do
		IPA = gsub(IPA, diphthong_regex, "%0" .. nonsyllabic)
	end

	local only_initial = (strict_initial and ("^[^" .. vowels .. "]*")
				or (stress_indicator .. "[^" .. vowels .. "]*"))

	for _, diphthong_regex in pairs(diphthongs_rising) do
		-- initial syllables
		IPA = gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)
	end

	for _, diphthong_regex in pairs(diphthongs_u) do
		-- initial syllables
		IPA = gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)

		local open_noninitial = 
			function(diphthong, after)
				if find(after, "^" .. consonant .. diacritic .. "*" .. vowel) then
					-- consonant after diphthong
					-- must be followed by vowel so that it's part of the
					-- following syllable, else it's in this syllable
					-- and thus this syllabie is closed

					return diphthong .. nonsyllabic .. after
				elseif find(after, "^" .. consonant) then
					-- consonant after diphthong
					-- must be in this syllable

					return diphthong .. after
				end
				-- no consonant after diphthong => open
				return diphthong .. nonsyllabic .. after
			end

		-- open non-initial syllables
		IPA = gsub(IPA, "(" .. diphthong_regex .. ")([^" .. nonsyllabic .. "].+)", open_noninitial)
		IPA = gsub(IPA, "(" .. diphthong_regex .. ")($)", open_noninitial)
	end

	return IPA
end

local function IPA_wordlet(term, is_narrow, has_initial)
	local rest = term
	local phonemes = {}
	
	while len(rest) > 0 do
		-- Find the longest string of letters that matches a recognised sequence in the list
		local longestmatch = ""
		
		for letter, phoneme in pairs(letters_phonemes) do
			if sub(rest, 1, len(letter)) == letter and len(letter) > len(longestmatch) then
				longestmatch = letter
			end
		end
		
		-- Convert the string to IPA
		if len(longestmatch) > 0 then
			table.insert(phonemes, letters_phonemes[longestmatch])
			rest = sub(rest, len(longestmatch) + 1)
		else
			table.insert(phonemes, sub(rest, 1, 1))
			rest = sub(rest, 2)
		end
	end
	
	local result = table.concat(phonemes)
	
	if is_narrow then
		-- articulation of h (Suomi, Toivanen & Ylitalo 2008, p. 28)
		result = gsub(result, "(.?)h(.?)",
			function (before, after)
				local h
				if after ~= "" and after ~= "h" then
					if before ~= "" and vowels:find(before) then
						if consonants:find(after) then
							-- vihma, yhtiö
							if before == "i" or before == "y" then
								h = "ç"
							-- mahti, kohme, tuhka
							elseif before == "ɑ" or before == "o" or before == "u" then
								h = "x"
							end
						-- maha
						elseif vowels:find(after) then
							h = "ɦ"
						end
					end
				end
				
				if h then
					return before .. h .. after
				end
			end)
		
		-- double letter replacement and diphthongs must be handled earlier here
		result = gsub(result, "(%a)%1", "%1" .. long)
		if has_initial then
			result = handle_diphthongs(result, true)
		end
	
		for letter, phoneme in pairs(replacements_narrow) do
			result = gsub(result, letter, phoneme)
		end
	end
	
	return result
end

function export.IPA_term(term, is_narrow)
	term = lower(term)
	local notinitial = {} -- true if the component is not an initial component
	local hyphenstress = "ˌ" -- secondary by default
	local is_prefix = false
	local is_suffix = false

	if find(term, "%/") then
		hyphenstress = tertiary -- tertiary if we have slashes
	end
	
	local found
	term, found = gsub(term, "^%-+", "")
	is_suffix = found > 0
	term, found = gsub(term, "%-+$", "")
	is_prefix = found > 0
	
	if is_narrow then
		term = export.add_rhythmic_stress(term)
	end
	
	-- scan for wordlets
	local wordlet_start = 1
	local wordlet_IPAs = {}

	while true do
		local wordlet_sep_at, _, wordlet_sep = find(term, "(" .. wordlet_sep .. ")", wordlet_start)
		local wordlet_end = wordlet_sep_at and (wordlet_sep_at - 1)
		local part = sub(term, wordlet_start, wordlet_end)

		local stress
		if wordlet_start == 1 then
			stress = "ˈ" -- primary
		elseif wordlet_sep == "/" then
			stress = "ˌ" -- secondary
		else
			stress = hyphenstress
		end

		table.insert(wordlet_IPAs, stress .. IPA_wordlet(part, is_narrow, true))
		if wordlet_sep_at == nil then
			break
		else
			wordlet_start = wordlet_sep_at + 1
		end
	end

	IPA = table.concat(wordlet_IPAs, "")
	
	if is_narrow then
		-- handle * in narrow transcription
		IPA = gsub(IPA, "ˣ(%)?%s*"..stress_p.."?)((.?)" .. diacritic .. "*)",
			function (post, after, potential_consonant)
				if potential_consonant == "" then
					if find(post, "^%)") then
						return "ʔ" .. post .. after
					else
						return post .. "(ʔ)" .. after
					end
				elseif consonants:find(potential_consonant) then
					if #post > 0 then
						local amark = ""
						if plosives:find(sub(after, 1, 1)) then
							amark = unreleased
						end
						return after .. amark .. post .. after
					else
						return post .. after .. long
					end
				else
					return post .. "ʔ" .. after
				end
			end)		
	else
		--	Replace double letters (vowels or consonants) with single letter plus length sign.
		IPA = gsub(IPA, "(%a)%1", "%1" .. long)
		IPA = handle_diphthongs(IPA, false)
	end
	
	IPA = apply_post_fixes(IPA)
	
	if is_narrow then
		IPA = apply_post_fixes_narrow(IPA)
	end
	
	if is_prefix then
		IPA = IPA .. "-"
	end
	if is_suffix then
		IPA = "-" .. IPA
	end
	
	return IPA
end

function export.IPA(term)
	if type(term) == "table" then
		term = term:getParent().args[1]
	end
	
	local title = mw.title.getCurrentTitle().text
	
	if not term then
		term = title
	elseif term == "*" then
		term = title .. "*"
	end
	
	local no_count = match(term, " ")
	
	IPA_narrow = export.IPA_term(term, true)
	IPA = export.IPA_term(term, false)
	return m_IPA.format_IPA_full {
		lang = require("Module:languages").getByCode(langcode),
		items = {{pron = "/" .. IPA .. "/"}, {pron = "[" .. IPA_narrow .. "]"}},
		no_count = no_count,
	}
end

return export