模組:Category tree/poscatboiler/data/lang-specific/jpx

local export = {}

local Hira = require("Module:scripts").getByCode("Hira")
local Jpan = require("Module:scripts").getByCode("Jpan")	
local kana_to_romaji = require("Module:Hrkt-translit").tr
local m_numeric = require("Module:ConvertNumeric")
local rmatch = mw.ustring.match

local reading_types_chinese = {
	["goon"] = "吳音",
	["kan'on"] = "漢音",
	["kan'yōon"] = "慣用音",
	["kun"] = "訓讀",
	["nanori"] = "名乘",
	["on"] = "音讀",
	["tōon"] = "唐音",
	["sōon"] = "宋音",
}

function export.add_labels(labels, lang)
	labels["連體詞"] = {
		description = "{{{langname}}} {{m|ja|連体詞|tr=れんたいし, rentaishi}}。用來修飾名詞、沒有活用且不做謂語的詞類。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["平假名"] = {
		description = "The [[hiragana]] ([[平仮名]],[[ひらがな]]) form of " .. (lang:getCode() == "ryu" and "an" or "a") .. " {{{langname}}} word is a [[phonetic]] representation of that word. " ..
		"Wiktionary represents {{{langname}}}-language segments in three ways: in normal form (with [[kanji]], if appropriate), in [[hiragana]] " ..
		"form (this differs from kanji form only when the segment contains kanji), and in [[romaji]] form.",
		additional = (lang:getCode() == "ja" and "更多資訊請見[[Wiktionary:日語]]。\n\n" or "") .. "參見[[:Category:{{{langname}}}片假名]]",
		toc_template = "categoryTOC-hiragana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:平假名字元",
		}
	}

	labels["歷史平假名"] = {
		description = "{{{langname}}}在歷史上曾使用的平假名形式。",
		additional = "參見[[:Category:{{{langname}}}歷史片假名]]。",
		toc_template = "categoryTOC-hiragana",
		parents = {
			"平假名",
			{name = "{{{langcat}}}", raw = true},
			"Category:平假名字元",
		}
	}

	labels["片假名"] = {
		description = "{{{langname}}} words and terms with katakana forms, sorted by conventional katakana sequence. Katakana is used primarily for transliterations of foreign words, including old Chinese Hanzi not used in [[shinjitai]].",
		additional = (lang:getCode() == "ja" and "更多資訊請見[[Wiktionary:日語]]。\n\n" or "") .. "參見[[:Category:{{{langname}}}平假名]]",
		toc_template = "categoryTOC-katakana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:片假名字元",
		}
	}

	labels["歷史片假名"] = {
		description = "{{{langname}}}在歷史上曾使用的片假名形式。",
		additional = "參見[[:Category:{{{langname}}}歷史平假名]]。",
		toc_template = "categoryTOC-katakana",
		parents = {
			"片假名",
			{name = "{{{langcat}}}", raw = true},
			"Category:片假名字元",
		}
	}

	labels["參雜假名的用語拼寫"] = {
		description = "{{{langname}}}中同時使用[[平假名]]和[[片假名]](有時還包括[[漢字]])的詞彙。",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"hiragana",
			"katakana",
		},
	}

	labels["敬語"] = {
		intro = "{{wikipedia|Honorific speech in Japanese}}",
		description = "{{{langname}}} [[honorific]]s.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["謙讓語"] = {
		description = "{{{langname}}} humble terms, or {{ja-r|謙譲語|けんじょうご}}, which is a type of honorific speech in {{{langname}}} that lowers the speaker in relation to the listener.",
		parents = "敬語",
	}

	labels["尊敬語"] = {
		description = "{{{langname}}} respectful terms, or {{ja-r|尊敬語|そんけいご}}, which is a type of honorific speech in {{{langname}}} that elevates the listener in relation to the speaker.",
		parents = "敬語",
	}

	labels["依讀音分類的漢字"] = {
		description = "依讀音分類的{{{langname}}}漢字。",
		parents = {{name = "漢字", sort = "讀音"}},
	}
	
	labels["枕詞"] = {
		intro = "{{wikipedia|枕詞}}",
		description = "{{{langname}}} idioms used in poetry to introduce specific words.",
		parents = {"idioms"},
	}

	labels["依漢字讀音分類的詞"] = {
		description = "{{{langname}}} categories grouped with regard to the readings of the kanji with which they are spelled.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["依讀音規則分類的詞"] = {
		description = "{{{langname}}} categories with terms grouped by their reading patterns.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	for reading_pattern, link in pairs {
		["熟字訓"] = "{{ja-r|熟字訓|じゅくじくん}}",
		["重箱讀法"] = "{{ja-r|重%箱%読み|じゅう%ばこ%よみ}}(在詞的前半部用音讀,後半部用訓讀)",
		["訓讀"] = "{{ja-r|訓読み|くんよみ}}",
		["名乘"] = "{{ja-r|名乗り|なのり}}",
		["音讀"] = "{{ja-r|音%読み|おん%よみ}}",
		["湯桶讀法"] = "{{ja-r|湯%桶%読み|ゆ%とう%よみ}}(在詞的前半部用訓讀,後半部用音讀)",
	} do
		labels["使用" .. reading_pattern .. "的詞"] = {
			description = "顯示{{{langname}}}中使用" .. link .. "的詞語。",
			breadcrumb = reading_pattern,
			parents = {{name = "依讀音規則分類的詞", sort = reading_pattern}},
		}
	end

	labels["依漢字讀法分類的詞"] = {
		description = "按漢字讀法分類的{{{langname}}}分類。主要分為漢語由來(音讀)和{{{langname}}}自身由來(訓讀)。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["未指定是否及物的動詞"] = {
		description = "在標題行模板中缺失<code>tr=</code>參數的{{{langname}}}動詞。",
		hidden = true,
		can_be_empty = true,
		parents = {"詞條維護"},
	}
	
	labels["四字熟語"] = {
		intro = "{{wikipedia|四字熟語}}",
		description = "{{{langname}}} four-[[kanji]] compound terms with idiomatic meanings, typically derived from Classical Chinese, Buddhist scripture or traditional Japanese proverbs.",
		additional = "對照漢語族的[[成語]]。",
		parents = {"俗語"},
	}

	local on_continuation = [=[


	Categories of terms with more specific types of ''on'' readings can be found in the following categories:
	* [[:Category:有漢字使用吳音的{{{langname}}}詞]]
	* [[:Category:有漢字使用慣用音的{{{langname}}}詞]]
	* [[:Category:有漢字使用漢音的{{{langname}}}詞]]
	* [[:Category:有漢字使用唐音的{{{langname}}}詞]]
	]=]

	local on_desc = ", which is a type of {{ja-r|音%読み|おん%よみ}} or {{ja-r|音|おん}} reading"
	for reading_type, reading_desc in pairs {
		["吳音"] = "a {{ja-r|呉%音|ご%おん}} reading" .. on_desc,
		["漢音"] = "a {{ja-r|漢%音|かん%おん}} reading" .. on_desc,
		["慣用音"] = "a {{ja-l|慣用音|かんようおん|kan'yōon}} reading" .. on_desc,
		["訓讀"] = "a {{ja-r|訓%読み|くん%よみ}} or {{ja-r|訓|くん}} reading",
		["名乘"] = "a {{ja-r|名%乗り|な%のり}} reading, which is a type of reading used for people and places",
		["音讀"] = "an {{ja-r|音%読み|おん%よみ}} or {{ja-r|音|おん}} reading",
		["唐音"] = "a {{ja-r|唐%音|とう%おん}} reading" .. on_desc,
		["宋音"] = "a {{ja-r|宋%音|そう%おん}} reading" .. on_desc,
	} do
		labels["有漢字使用" .. reading_type .. "的詞"] = {
			description = "{{{langname}}} categories with terms that are spelled with one or more kanji which exhibit " .. reading_desc .. ".",
			additional = reading_type == "on" and on_continuation,
			breadcrumb = reading_type,
			parents = {{name = "依漢字讀法分類的詞", sort = reading_pattern}},
		}
	end

	labels["有借字的詞"] = {
		intro = "{{wikipedia|借字 (日語)|借字}}",
		description = "使用{{m|ja|当て字}}(借字)的{{{langname}}}詞。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["有代用字的詞"] = {
		description = "使用代用字的{{{langname}}}詞,依據{{temp|ja-daiyouji}}形成的分類。",
		parents = {"詞源"},
	}
	
	labels["依歷史假名遣分類的詞"] = {
		breadcrumb = "歷史假名遣",
		description = "本分類根據{{{langname}}}{{w|歷史假名遣}}中的拼寫是否包含特定假名進行分類。",
		parents = {{name = "依字元分類的詞", sort = " "}},
	}

	labels["有熟字訓的詞"] = {
		description = "使用{{m|ja|熟字訓}}的{{{langname}}}詞。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["有常用漢字的詞"] = {
		intro = "{{wikipedia|常用漢字}}",
		description = "至少包含一個漢字,且所有漢字都在常用漢字表中的{{{langname}}}詞。",
		additional = "參見[[:Category:有非常用漢字的{{{langname}}}詞]]。",
		parents = {{name = "依正字法屬性分類的詞", sort = "jōyō"}},
	}

	labels["有非常用漢字的詞"] = {
		intro = "{{wikipedia|Jōyō kanji}}",
		description = "至少包含一個不在常用漢字表中的漢字的{{{langname}}}詞。",
		additional = "參見[[:Category:有常用漢字的{{{langname}}}詞]]。",
		parents = {{name = "依正字法屬性分類的詞", sort = "non-jōyō"}},
	}

	for _, non_joyo_type in ipairs {"表外漢字", "人名用漢字"} do
		labels["有" .. non_joyo_type .. "的詞"] = {
			description = "有" .. non_joyo_type .. "的{{{langname}}}詞。",
			parents = {{name = "有非常用漢字的詞", sort = non_joyo_type}},
		}
	end

	for i = 1, 6 do
		local ord = m_numeric.ones_position[i]
		labels["有" .. ord .. "年級漢字的詞"] = {
			description = "詞中有於" .. ord .. "年級教授的漢字的{{{langname}}}詞。",
			parents = {{name = "有常用漢字的詞", sort = i}},
		}
	end

	labels["有中學漢字的詞"] = {
		description = "詞中有於中學教授的漢字的{{{langname}}}詞。",
		parents = {{name = "有常用漢字的詞", sort = "secondary school"}},
	}
		
	labels["有多個讀音的詞"] = {
		description = "有多種讀音(因此有多種[[假名]]寫法)的{{{langname}}}詞。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["依漢字音拍數分類的詞"] = {
		description = "依據漢字讀音中的音拍數進行分組的{{{langname}}}分類。",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["單漢字詞"] = {
		description = "由單個漢字組成的{{{langname}}}詞。",
		parents = {"依正字法屬性分類的詞", {name = "character counts", sort = " "}},
	}
end



function export.add_handlers(handlers, lang, m_lang)
	-- FIXME: Only works for 0 through 19.
	local word_to_number = {}
	for k, v in pairs(m_numeric.ones_position) do
		word_to_number[v] = k
	end

	local kana_capture = "([-ぁ-ー𛀁𛀆]+)"

	local periods = {
		['歷史假名遣'] = true,
		['古日語'] = true,
	}
	
	local function separate_period_reading_type(text)
		if not text then return nil, nil end
		for k, v in pairs(periods) do
			matched = string.find(text, k)
			if matched then
				text = text:gsub(k, "")
				if text == "" then return "", k end
				return k, text
			end
		end
		return "", text
	end

	local function get_period_text_and_reading_type_link(period, reading_type)
		period = period ~= "" and period or nil
		if period and not periods[period] then
			return nil
		end
		local period_text = period and period or nil

		-- Allow periods (historical or ancient) by themselves; they will parse as reading types.
		if not period and periods[reading_type] then
			return nil, reading_type
		end

		local reading_type_link = "[[Appendix:日語術語表#" .. reading_type .. "|" .. reading_type .. "]]"
		return period_text, reading_type_link
	end
	
	local function get_sc(str)
		return mw.ustring.match(str:gsub('[%z\1-\127]', ''), '[^' .. Hira:getCharacters() .. ']') and Jpan or Hira
	end
			
	local function get_tagged_reading(reading)
		return require("Module:script utilities").tag_text(reading, lang, get_sc(reading))
	end

	local function get_reading_link(reading, historical)
		return require("Module:links").full_link({
			lang = lang,
			sc = get_sc(reading),
			term = reading:gsub('[%.%- ]', ''),
			tr = kana_to_romaji((reading:gsub('%-', '')), lang:getCode(), nil, {hist = historical})
		}, 'term')
	end

	local function is_on_subtype(reading_type)
		return reading_type:find(".音$")
	end


	table.insert(handlers, function(data)
		local count, plural = data.label:match("^有(.+)個漢字的詞$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			-- if (count == "one") ~= (plural == "") then
			-- 	return nil
			-- end
			local num = word_to_number[count]
			if not num then
				if count == '兩' then
					num = 2
				else
					return nil
				end	
			end
			return {
				description = "有" .. count .. "個漢字的{{{langname}}}詞。",
				breadcrumb = num,
				parents = {{name = "字符數", sort = num}},
			}
		end
	end)


	table.insert(handlers, function(data)
		local count = data.label:match("^讀作(.)個音拍的漢字$")
		if count then
			local num = count
			if not num then
				return nil
			end
			return {
				description = "含有".. count .."個音拍的{{{langname}}}漢字讀法。",
				breadcrumb = num,
				parents = {{name = "依漢字音拍數分類的詞", sort = num}},
			}
		end
	end)
	
	table.insert(handlers, function(data)
		local label_pref, kana = data.label:match("^(歷史假名遣)包含「" .. kana_capture .. "」的詞$")
		
		if not kana then
			return
		end
		
		local lang = data.lang
		
		return {
			description = "{{w|歷史假名遣}}含有" .. get_reading_link(kana, lang, "historical") .. " 的{{{langname}}}詞。",
			displaytitle = "歷史假名遣包含「" .. get_tagged_reading(kana, lang) .. "」的{{{langname}}}詞",
			breadcrumb = "歷史假名遣",
			parents = {
				{name = "帶「" .. kana .. "」的詞", sort = " "},
				{name = "依歷史假名遣分類的詞", sort = lang:makeSortKey(kana)}
			},
		}
	end)

	table.insert(handlers, function(data)
		local label_pref, reading = rmatch(data.label, "^(讀作「" .. kana_capture .. "」的漢字)$")
		if reading then
			local parents = {{name = "依讀音分類的漢字", sort = (data.lang:makeSortKey(reading))}}

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "讀作「" .. get_reading_link(reading, period_text) .. "」的{{{langname}}}漢字。",
				displaytitle = "讀作「" .. tagged_reading .. "」的{{{langname}}}漢字",
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)

	table.insert(handlers, function(data)
		local label_pref, period_plus_reading_type, reading = rmatch(data.label, "^(([一-鿌]*)讀作「" .. kana_capture .. "」的漢字)$")
		if period_plus_reading_type then
			local period, reading_type = separate_period_reading_type(period_plus_reading_type)
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				{name = "依" .. (period_text or "") .. reading_type .. "分類的漢字", sort = (data.lang:makeSortKey(reading))}
			}
			if is_on_subtype(reading_type) then
				table.insert(parents, {name = (period_text or "") .. "音讀讀作「" .. reading .. "」的漢字", sort = reading_type})
			elseif period_text then
				table.insert(parents, {name = period_text .. "讀作「" .. reading .. "」的漢字", sort = reading_type})
			end
			if not period_text then
				table.insert(parents, {name = "讀作「" .. reading .. "」的漢字", sort = reading_type})
			end

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = (period_text or "") .. reading_type_link .. "讀法讀作「" .. get_reading_link(reading, period_text) .. "」的{{{langname}}}[[漢字]]。",
				displaytitle = (period or "") .. reading_type .. "讀作「".. tagged_reading .. "」的{{{langname}}}漢字",
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local matched = rmatch(data.label, "^依([一-鿌]*)分類的漢字$")
		local period, reading_type = separate_period_reading_type(matched)
		if period then
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				is_on_subtype(reading_type) and {name = "依" .. (period_text or "") .. "音讀分類的漢字", sort = reading_type} or
				period_text and {name = "依" .. reading_type .. "分類的漢字", sort = period} or
				{name = "依讀音分類的漢字", sort = reading_type}
			}
			if period_text then
				table.insert(parents, {name = "依" .. period_text .. "分類的漢字", sort = reading_type})
			end

			-- Compute description.
			local description = "依照" .. (period_text or "") .. reading_type_link .. "進行分類的{{{langname}}}[[漢字]]。"
			return {
				description = description,
				breadcrumb = (period_text or "") .. reading_type,
				parents = parents,
			}
		end
	end)





	table.insert(handlers, function(data)
		local label_pref, reading = rmatch(data.label, "^(讀作「" .. kana_capture .. "」的詞)$")
		if reading then
			-- Compute parents.
			local sort_key = (data.lang:makeSortKey(reading))
			local mora_count = require("Module:ja").count_morae(reading)
			local parents = {
				{name = "依漢字讀音分類的詞", sort = sort_key},
				{name = "讀作" .. mora_count .. "個音拍的漢字", sort = sort_key},
				{name = "讀作「" .. reading .. "」的漢字", sort = " "},
			}

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "讀法為" .. get_reading_link(reading) ..
				"的{{{langname}}}詞條(不考慮任何發音變化)。",
				displaytitle = "讀作「" .. tagged_reading .. "」的{{{langname}}}詞",
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local kanji, reading = rmatch(data.label, "^寫作「(.)」讀作「" .. kana_capture .. "」的詞$")
		if not kanji then
			return nil
		end
		local params = {
			[1] = {list = true},
		}
		local args = require("Module:parameters").process(data.args, params)
		if #args[1] == 0 then
			error("對於形如「寫作『?』讀作『?』的" .. data.lang:getCanonicalName() ..
				"詞」的分類,請指明至少一種讀法(例如kun表示訓讀,on表示音讀),分別填入1=、2=、3=等。")
		end
		local reading_types = {}
		for _, reading_type in ipairs(args[1]) do
			table.insert(reading_types, reading_types_chinese[reading_type])
		end
		reading_types = mw.text.listToText(reading_types, "、", "或")

		local parents = {
			{name = "帶「" .. kanji.. "」的詞", sort = (data.lang:makeSortKey(reading))},
			-- FIXME, using the kanji directly as the sort key is what it did before but maybe we should call [[Module:Hani-sortkey]]
			-- to get the radical/stroke sort key
			{name = "讀作「" .. reading.. "」的詞", sort = kanji},
		}
		for _, reading_type in ipairs(args[1]) do
			table.insert(parents, {name = "有漢字使用" .. reading_types_chinese[reading_type] .. "的詞", sort = (data.lang:makeSortKey(reading))})
		end
		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_reading = get_tagged_reading(reading)
		return {
			description = "含有漢字「{{l|{{{langcode}}}|" .. kanji .. "}}」且採用" ..
				reading_types .. "讀作「" .. get_reading_link(reading) .. "」的{{{langname}}}詞語。",
			displaytitle = "寫作「" .. tagged_kanji .. "」讀作「" .. tagged_reading .. "」的{{{langname}}}詞",
			breadcrumb = "讀作「" .. tagged_reading .. "」",
			parents = parents,
		}, true
	end)

	table.insert(handlers, function(data)
		local kanji, daiyoji = rmatch(data.label, "^使用「(.)」作為代用字代替「(.)」的詞")
		if not kanji then
			return nil
		end
		local params = {
			["sort"] = {},
		}
		local args = require("Module:parameters").process(data.args, params)
		if not args.sort then
			error("對於形如使用『?』作為代用字代替『?』的" .. data.lang:getCanonicalName() ..
				"詞」的分類,請指明至少一種代用字用法,在sort=填入代用字的發音")
		end

		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_daiyoji = get_tagged_reading(daiyoji)
		return {
			description = "採用代用字「{{l|{{{langcode}}}|" .. kanji .. "}}」来代替「{{l|{{{langcode}}}|" .. daiyoji .. "}}」的{{{langname}}}詞語。",
			displaytitle = "使用「" .. tagged_kanji .. "」作為代用字代替「" .. tagged_daiyoji .. "」的{{{langname}}}詞",
			breadcrumb = "使用「" .. tagged_kanji .. "」代替「" .. tagged_daiyoji .. "」的詞",
			parents = {{name = "有代用字的詞", sort = args.sort}},
		}, true
	end)
end

return export