模組:Category tree/poscatboiler/data/lang-specific/jpx/sandbox

local export = {}

local m_str_utils = require("Module:string utilities")

local concat = table.concat
local full_link = require("Module:links").full_link
local insert = table.insert
local Hani_sort = require("Module:Hani-sortkey").makeSortKey
local match = m_str_utils.match
local sort = table.sort
local tag_text = require("Module:script_utilities").tag_text

local Hira = require("Module:scripts").getByCode("Hira")
local Jpan = require("Module:scripts").getByCode("Jpan")
local kana_to_romaji = require("Module:Hrkt-translit").tr
local m_numeric = require("Module:ConvertNumeric")

local kana_capture = "([-" .. require("Module:ja/data/range").kana .. "・]+)"
local yomi_data = require("Module:kanjitab/data")


function export.add_labels(labels, lang)
	labels["adnominals"] = {
		description = "{{{langname}}}{{ja-r|連%体%詞|れん%たい%し}}。用來修飾名詞、沒有活用且不做謂語的詞類。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["平假名"] = {
		description = "使用平假名({{ja-r|平%仮%名|ひら%が%な}})形式的{{{langname}}}詞彙,按照傳統的平假名[[五十音順]]排列。平假名形式是該詞的[[音標]]表示。" ..
		"維基詞典以三種方式表示{{{langname}}}語段:正常形式(如適用,帶有[[漢字]])、[[平假名]]" ..
		"形式(僅當語段包含漢字,而平假名形式與漢字形式不同時)以及[[羅馬字]]形式。",
		additional = (lang:getCode() == "ja" and "更多資訊請見[[Wiktionary:日語]]。\n\n" or "") .. "參見[[:Category:{{{langname}}}片假名]]",
		toc_template = "categoryTOC-hiragana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:平假名字元",
		}
	}

	labels["歷史平假名"] = {
		description = "{{{langname}}}在歷史上曾使用的平假名形式。",
		additional = "參見[[:Category:{{{langname}}}歷史片假名]]。",
		toc_template = "categoryTOC-hiragana",
		parents = {
			"平假名",
			{name = "{{{langcat}}}", raw = true},
			"Category:平假名字元",
		}
	}

	labels["片假名"] = {
		description = "{{{langname}}} terms with katakana {{mdash}} {{ja-r|片%仮%名|かた%か%な}} {{mdash}} forms, sorted by conventional katakana sequence. Katakana is used primarily for transliterations of foreign words, including old Chinese Hanzi not used in [[shinjitai]].",
		additional = (lang:getCode() == "ja" and "For more information, see [[Wiktionary:Japanese language]].\n\n" or "") .. "''See also'' [[:Category:{{{langname}}} hiragana]]",
		toc_template = "categoryTOC-katakana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:片假名字元",
		}
	}

	labels["歷史片假名"] = {
		description = "{{{langname}}}在歷史上曾使用的片假名形式。",
		additional = "參見[[:Category:{{{langname}}}歷史平假名]]。",
		toc_template = "categoryTOC-katakana",
		parents = {
			"片假名",
			{name = "{{{langcat}}}", raw = true},
			"Category:片假名字元",
		}
	}

	labels["參雜假名的用語拼寫"] = {
		description = "{{{langname}}}中同時使用[[平假名]]和[[片假名]](有時還包括[[漢字]])的詞彙。",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"平假名",
			"片假名",
		},
	}

	labels["敬語"] = {
		topright = "{{wikipedia|日語敬語}}",
		description = "{{{langname}}}中用於表達敬意的表達方式。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["敬語"] = {
		description = "{{{langname}}} humble terms, or {{ja-r|謙%譲%語|けん%じょう%ご}}, which is a type of honorific speech that lowers the speaker in relation to the listener.",
		parents = "敬語",
	}

	labels["謙讓語"] = {
		description = "{{{langname}}} respectful terms, or {{ja-r|尊%敬%語|そん%けい%ご}}, which is a type of honorific speech that elevates the listener in relation to the speaker.",
		parents = "敬語",
	}

	labels["依讀音分類的漢字"] = {
		description = "依讀音分類的{{{langname}}}漢字。",
		parents = {{name = "漢字", sort = "讀音"}},
	}
	
	labels["枕詞"] = {
		topright = "{{wikipedia|枕詞}}",
		description = "{{{langname}}}中表現某個可見的具體事物現象的語彙,通常用於詩歌。",
		parents = {"idioms"},
	}

	labels["依漢字讀音分類的詞"] = {
		description = "{{{langname}}} categories grouped with regard to the readings of the kanji with which they are spelled.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["依讀音規則分類的詞"] = {
		description = "{{{langname}}} categories with terms grouped by their reading patterns.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	local function handle_onyomi_list(category, category_type, cat_yomi_type)
		local onyomi, seen = {}, {}
		for _, yomi in pairs(yomi_data) do
			if not seen[yomi] and yomi.onyomi then
				local yomi_catname = yomi[category_type]
				if yomi_catname ~= false then
					local yomi_type = yomi.type
					if yomi_type ~= "on'yomi" and yomi_type ~= cat_yomi_type then
						insert(onyomi, "[[:Category:{{{langname}}} " .. category:gsub("{{{yomi_catname}}}", yomi_catname) .. "]]")
					end
				end
			end
			seen[yomi] = true
		end
		sort(onyomi)
		return onyomi
	end
	
	local function add_yomi_category(category, category_type, parent, description)
		for _, yomi in pairs(yomi_data) do
			local yomi_catname = yomi[category_type]
			if yomi_catname ~= false then
				local yomi_type = yomi.type
				local yomi_desc = yomi.link or yomi_catname
				if yomi.description then
					yomi_desc = yomi_desc .. "; " .. yomi.description
				end
				local label = {
					description = description .. " " .. yomi_desc .. ".",
					breadcrumb = yomi_type,
					parents = {{name = parent, sort = yomi_catname}},
				}
				if yomi.onyomi then
					local onyomi = handle_onyomi_list(category, category_type, yomi_type)
					
					label.additional = "Categories of terms with " ..
						(yomi_type == "on'yomi" and "more" or "other") ..
						" specific types of on'yomi readings can be found in the following categories:\n* " .. concat(onyomi, "\n* ")
					
					if yomi_type ~= "on'yomi" then
						insert(label.parents, 1, {
							name = (category:gsub("{{{yomi_catname}}}", yomi_data.on[category_type])),
							sort = yomi_catname
						})
					end
				end
				labels[category:gsub("{{{yomi_catname}}}", yomi_catname)] = label
			end
		end
	end
	
	add_yomi_category(
		"使用{{{yomi_catname}}}的詞",
		"reading_category",
		"依讀音規則分類的詞",
		"{{{langname}}} terms exhibiting"
	)
	
	add_yomi_category(
		"有漢字使用{{{yomi_catname}}}音讀的詞",
		"kanji_category",
		"依漢字讀法分類的詞",
		"{{{langname}}} categories with terms that are spelled with one or more kanji exhibiting"
	)
	
	labels["缺少yomi參數的詞"] = {
		description = "{{tl|{{{langcode}}}-kanjitab}} 中至少缺失一個[[Appendix:日語術語表#讀音|讀音]]的{{{langname}}}詞。",
		hidden = true,
		can_be_empty = true,
		parents = {"entry maintenance"},
	}
	
	labels["依漢字讀法分類的詞"] = {
		description = "按漢字讀法分類的{{{langname}}}分類。" ..
		"主要分為源自漢語的借詞(音讀,{{ja-r|音|おん}})和源於{{{langname}}}自身的本土詞(訓讀,{{ja-r|訓|くん}})。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["有借字的詞"] = {
		topright = "{{wikipedia|借字 (日語)|借字}}",
		description = "包含一個或多個[[Appendix:日語術語表#借字|借字]]({{ja-r|当て字|あてじ}})的{{{langname}}}詞。借字是用來表示讀音而非實際意義的[[漢字]](雖然意義可能對漢字的選擇有一定影響)。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["有代用字的詞"] = {
		description = "使用[[Appendix:日語術語表#代用字|代用字]]的{{{langname}}}詞,依據{{temp|ja-daiyouji}}形成的分類。",
		parents = {"terms by etymology"},
	}

	labels["有熟字訓的詞"] = {
		description = "使用{{m|ja|熟字訓}}的{{{langname}}}詞。",
		description = "包含一個或多個[[Appendix glossary#熟字訓|熟字訓]]漢字({{ja-r|熟字訓|じゅくじくん}},用來表示意義而非讀音的[[漢字]])的{{{langname}}}詞。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["有常用漢字的詞"] = {
		topright = "{{wikipedia|常用漢字}}",
		description = "至少包含一個漢字,且所有漢字都在{{ja-r|常%用 漢%字|じょう%よう かん%じ}}表中的{{{langname}}}詞。",
		additional = "參見[[:Category:有非常用漢字的{{{langname}}}詞]]。",
		parents = {{name = "terms by orthographic property", sort = "jōyō"}},
	}

	labels["有非常用漢字的詞"] = {
		topright = "{{wikipedia|常用漢字}}",
		description = "至少包含一個不在{{ja-r|常%用 漢%字|じょう%よう かん%じ}}表中的漢字的{{{langname}}}詞。",
		additional = "參見[[:Category:有常用漢字的{{{langname}}}詞]]。",
		parents = {{name = "terms by orthographic property", sort = "non-jōyō"}},
	}

	for _, non_joyo_type in ipairs {"表外漢字", "人名用漢字"} do
		labels["有" .. non_joyo_type .. "的詞"] = {
			description = "有" .. non_joyo_type .. "的{{{langname}}}詞。",
			parents = {{name = "有非常用漢字的詞", sort = non_joyo_type}},
		}
	end

	for i = 1, 6 do
		local ord = m_numeric.ones_position[i]
		labels["有" .. ord .. "年級漢字的詞"] = {
			description = "詞中有於" .. ord .. "年級教授的漢字的{{{langname}}}詞。",
			parents = {{name = "有常用漢字的詞", sort = i}},
		}
	end

	labels["有中學漢字的詞"] = {
		description = "詞中有於中學教授的漢字的{{{langname}}}詞。",
		parents = {{name = "有常用漢字的詞", sort = "secondary school"}},
	}
		
	labels["有多個讀音的詞"] = {
		description = "有多種讀音(因此有多種[[假名]]寫法)的{{{langname}}}詞。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["依漢字音拍數分類的詞"] = {
		description = "依據漢字讀音中的音拍數進行分組的{{{langname}}}分類。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["單漢字詞"] = {
		description = "由單個漢字組成的{{{langname}}}詞。",
		parents = {"依正字法屬性分類的詞", {name = "character counts", sort = " "}},
	}
	
	labels["未指定是否及物的動詞"] = {
		description = "在標題行模板中缺失<code>tr=</code>參數的{{{langname}}}動詞。",
		hidden = true,
		can_be_empty = true,
		parents = {"詞條維護"},
	}
	
	labels["四字熟語"] = {
		topright = "{{wikipedia|四字熟語}}",
		description = "{{{langname}}}中由四個[[漢字]]組成的習語({{ja-r|四字熟語|よじじゅくご}}),具有慣用意義;通常源自文言文、佛教經典或日本傳統諺語。",
		additional = "對照漢語族的[[成語]]。",
		parents = {"俗語"},
	}
end



function export.add_handlers(handlers, lang, m_lang)
	-- FIXME: Only works for 0 through 19.
	local word_to_number = {}
	for k, v in pairs(m_numeric.ones_position) do
		word_to_number[v] = k
	end

	local periods = {
		historical = true,
		ancient = true,
	}

	local function get_period_text_and_reading_type_link(period, reading_type)
		if period and not periods[period] then
			return nil
		end
		local period_text = period and period .. " " or nil

		-- Allow periods (historical or ancient) by themselves; they will parse as reading types.
		if not period and periods[reading_type] then
			return nil, reading_type
		end

		local reading_type_link = "[[Appendix:日語術語表#" .. reading_type .. "|" .. reading_type .. "]]"
		return period_text, reading_type_link
	end
	
	local function get_sc(str)
		return match(str:gsub("[%s%p]+", ""), "[^" .. Hira:getCharacters() .. "]") and Jpan or Hira
	end
			
	local function get_tagged_reading(reading)
		return tag_text(reading, lang, get_sc(reading))
	end

	local function get_reading_link(reading, period, link)
		local hist = periods[period]
		reading = reading:gsub("[%.%-%s]+", "")
		return full_link({
			lang = lang,
			sc = get_sc(reading),
			term = link or reading:gsub("・", ""),
			-- If we have okurigana, demarcate furigana.
			alt = reading:gsub("^(.-)・", "<span style=\"border-top:1px solid;position:relative;padding:1px;\">%1<span style=\"position:absolute;top:0;bottom:67%%;right:0%%;border-right:1px solid;\"></span></span>"),
			tr = kana_to_romaji((reading:gsub("・", "-")), lang:getCode(), nil, hist and {hist = hist} or nil)
				:gsub("^(.-)%-", "<u>%1</u>"),
			pos = reading:find("・", 1, true) and get_tagged_reading((reading:gsub("^.-・", "~"))) or nil
		}, "term")
	end

	local function is_on_subtype(reading_type)
		return reading_type:find(".on$")
	end

	insert(handlers, function(data)
		local count, plural = data.label:match("^有(.+)個漢字的詞$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			-- if (count == "one") ~= (plural == "") then
			-- 	return nil
			-- end
			local num = word_to_number[count]
			if not num then
				if count == '兩' then
					num = 2
				else
					return nil
				end	
			end
			return {
				description = "有" .. count .. "個漢字的{{{langname}}}詞。",
				breadcrumb = num,
				parents = {{name = "字符數", sort = num}},
			}
		end
	end)


	insert(handlers, function(data)
		local count, plural = data.label:match("^讀作(.+)個音拍的漢字$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			-- if (count == "one") ~= (plural == "") then
			-- 	return nil
			-- end
			local num = word_to_number[count]
			if not num then
				return nil
			end
			return {
				description = "讀作" .. count .. "個音拍的{{{langname}}}漢字。",
				breadcrumb = num,
				parents = {{name = "依漢字音拍數分類的詞", sort = num}},
			}
		end
	end)


	insert(handlers, function(data)
		local label_pref, period, reading_type, reading = match(data.label, "^(kanji with ([a-z]-) ?([%a']+) reading )" .. kana_capture .. "$")
		
		if not period then
			return
		end
		
		period = period ~= "" and period or nil
		local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
		
		if not reading_type_link then
			return
		end
		
		-- Compute parents.
		local parents, breadcrumb = {}
		if reading:find("・", 1, true) then
			local okurigana = reading:match("・(.*)")
			insert(parents, {
				name = "kanji with " .. (period_text or "") .. reading_type .. " reading " .. reading:match("(.-)・"),
				-- Sort by okurigana, since all coordinate categories will have the same furigana.
				sort = (data.lang:makeSortKey(okurigana))
			})
			breadcrumb = "~" .. okurigana
		else
			insert(parents, {
				name = "kanji by " .. (period_text or "") .. reading_type .. " reading",
				sort = (data.lang:makeSortKey(reading))
			})
			breadcrumb = reading
		end
		if is_on_subtype(reading_type) then
			insert(parents, {name = "kanji with " .. (period_text or "") .. "on reading " .. reading, sort = reading_type})
		elseif period_text then
			insert(parents, {name = "kanji with " .. period_text .. "reading " .. reading, sort = reading_type})
		end
		if not period_text then
			insert(parents, {name = "kanji read as " .. reading, sort = reading_type})
		end
		
		return {
			description = "{{{langname}}} [[kanji]] with the " .. (period_text or "") .. reading_type_link .. " reading " ..
				get_reading_link(reading, period or reading_type) .. ".",
			displaytitle = "{{{langname}}} " .. label_pref .. get_tagged_reading(reading),
			breadcrumb = get_tagged_reading(breadcrumb),
			parents = parents,
		}
	end)


	insert(handlers, function(data)
		local period, reading_type = match(data.label, "^kanji by ([a-z]-) ?([%a']+) reading$")
		
		if not period then
			return
		end
		
		period = period ~= "" and period or nil
		local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
		
		if not reading_type_link then
			return nil
		end

		-- Compute parents.
		local parents = {
			is_on_subtype(reading_type) and {name = "kanji by " .. (period_text or "") .. "on reading", sort = reading_type} or
			period_text and {name = "kanji by " .. reading_type .. " reading", sort = period} or
			{name = "kanji by reading", sort = reading_type}
		}
		if period_text then
			insert(parents, {name = "kanji by " .. period_text .. "reading", sort = reading_type})
		end

		-- Compute description.
		local description = "{{{langname}}} [[kanji]] categorized by " .. (period_text or "") .. reading_type_link .. " reading."
		return {
			description = description,
			breadcrumb = (period_text or "") .. reading_type,
			parents = parents,
		}
	end)


	insert(handlers, function(data)
		local label_pref, reading = match(data.label, "^(kanji read as )" .. kana_capture .. "$")
		if not reading then
			return
		end
		
		local params = {
			["histconsol"] = {},
		}
		local args = require("Module:parameters").process(data.args, params)
		
		local parents, breadcrumb = {}
		if reading:find("・", 1, true) then
			local okurigana = reading:match("・(.*)")
			insert(parents, {
				name = "kanji read as " .. reading:match("(.-)・"),
				-- Sort by okurigana, since all coordinate categories will have the same furigana.
				sort = (data.lang:makeSortKey(okurigana))
			})
			breadcrumb = "~" .. okurigana
		else
			insert(parents, {
				name = "kanji by reading",
				sort = (data.lang:makeSortKey(reading))
			})
			breadcrumb = reading
		end
		
		local addl
		local period_text
		if args.histconsol then
			period_text = "historical"
			addl = ("This is a [[Wikipedia:Historical kana orthography|historical]] [[Wikipedia:Kanazukai|reading]], now " ..
			"consolidated with the [[Wikipedia:Modern kana usage|modern reading]] of " ..
			get_reading_link(args.histconsol, nil, ("Category:Japanese kanji read as %s"):format(args.histconsol)) .. ".")
		end
		
		return {
			description = "{{{langname}}} [[kanji]] read as " .. get_reading_link(reading, period_text) .. ".",
			additional = addl,
			displaytitle = "{{{langname}}} " .. label_pref .. get_tagged_reading(reading),
			breadcrumb = get_tagged_reading(breadcrumb),
			parents = parents,
		}, true
	end)


	insert(handlers, function(data)
		local label_pref, reading = match(data.label, "^(terms spelled with kanji read as )" .. kana_capture .. "$")
		if reading then
			-- Compute parents.
			local sort_key = (data.lang:makeSortKey(reading))
			local mora_count = m_lang.count_morae(reading)
			local mora_count_words = m_numeric.spell_number(tostring(mora_count))
			local parents = {
				{name = "terms by kanji readings", sort = sort_key},
				{name = "kanji readings with " .. mora_count_words .. " mora" .. (mora_count > 1 and "e" or ""), sort = sort_key},
				{name = "kanji read as " .. reading, sort = " "},
			}

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} terms that contain kanji that exhibit a reading of " .. get_reading_link(reading) ..
				" in those terms prior to any sound changes.",
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	insert(handlers, function(data)
		local kanji, reading = match(data.label, "^terms spelled with (.) read as " .. kana_capture .. "$")
		if not kanji then
			return nil
		end
		local params = {
			[1] = {list = true},
		}
		local args = require("Module:parameters").process(data.args, params)
		if #args[1] == 0 then
			error("For categories of the form \"" .. data.lang:getCanonicalName() ..
				" terms spelled with KANJI read as READING\", at least one reading type (e.g. {{code|kun}} or {{code|on}}) must be specified using 1=, 2=, 3=, etc.")
		end
		local yomi_types, parents = {}, {}
		for _, yomi, category in ipairs(args[1]) do
			local yomi_data = yomi_data[yomi]
			if not yomi_data then
				error("The yomi type \"" .. yomi .. "\" is not recognized.")
			end
			category = yomi_data.kanji_category
			if not category then
				error("The yomi type \"" .. yomi .. "\" is not valid for this type of category.")
			end
			insert(yomi_types, yomi_data.link)
			insert(parents, {
				name = "terms spelled with kanji with " .. category .. " readings",
				sort = (data.lang:makeSortKey(reading))
			})
		end
		
		insert(parents, 1, {name = "寫作「" .. kanji .. "」的日語詞", sort = (data.lang:makeSortKey(reading))})
		insert(parents, 2, {name = "讀作「" .. reading .. "」的詞", sort = Hani_sort(kanji)})
		
		yomi_types = (#yomi_types > 1 and "one of " or "") .. "its " ..
			require("Module:table").serialCommaJoin(yomi_types, {conj = "or"}) ..
			" reading" .. (#yomi_types > 1 and "s" or "")
		
		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_reading = get_tagged_reading(reading)
		return {
			description = "{{{langname}}} terms spelled with {{l|{{{langcode}}}|" .. kanji .. "}} with " ..
				yomi_types .. " of " .. get_reading_link(reading) .. ".",
			displaytitle = "{{{langname}}} terms spelled with " .. tagged_kanji .. " read as " .. tagged_reading,
			breadcrumb = "read as " .. tagged_reading,
			parents = parents,
		}, true
	end)


	insert(handlers, function(data)
		local kanji, daiyoji = rmatch(data.label, "^使用「(.)」作為代用字代替「(.)」的詞")
		if not kanji then
			return nil
		end
		local params = {
			["sort"] = {},
		}
		local args = require("Module:parameters").process(data.args, params)
		if not args.sort then
			error("對於形如使用『?』作為代用字代替『?』的" .. data.lang:getCanonicalName() ..
				"詞」的分類,請指明至少一種代用字用法,在sort=填入代用字的發音")
		end

		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_daiyoji = get_tagged_reading(daiyoji)
		return {
			description = "採用[[Appendix:日語術語表#代用字|代用字]]「{{l|{{{langcode}}}|" .. kanji .. "}}」来代替「{{l|{{{langcode}}}|" .. daiyoji .. "}}」的{{{langname}}}詞語。",
			displaytitle = "使用「" .. tagged_kanji .. "」作為代用字代替「" .. tagged_daiyoji .. "」的{{{langname}}}詞",
			breadcrumb = "使用「" .. tagged_kanji .. "」代替「" .. tagged_daiyoji .. "」的詞",
			parents = {{name = "有代用字的詞", sort = args.sort}},
		}, true
	end)
end


return export