local export = {}
local replace = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local itermatch = mw.ustring.gmatch
local split = mw.text.split
local itersplit = mw.text.gsplit
local trim = mw.text.trim
local lower = mw.ustring.lower
local sub = mw.ustring.sub
local len = mw.ustring.len
--copied from [[en:Module:User:Suzukaze-c/02]]
function export.extract_ja_readings(pagename)
if match(pagename, "%[%[") then
--error("Cannot process Japanese text with embedded wikilinks.")
return {}
end
local readings
local content
local function process(text)
text = replace(text, 'hhira=[^|}]+', '')
text = replace(text, 'decl=[^|}]+', '')
text = replace(text, 'infl=[^|}]+', '')
text = replace(text, 'kyu=[^|}]+', '')
text = replace(text, 'head=[^|}]+', '')
text = replace(text, 'hira=', '')
if find(text, 'proper') then
text = '^' .. replace(text, '([ |])', '%1^')
end
if find(content, 'infl=い') then
text = replace(text, 'しい', 'し.い')
end
if find(content, 'ja%-verb') then
text = replace(text, 'おう', 'お.う')
end
for parameter in itersplit(text, '|') do
if find(parameter, '[あ-ー]') then
table.insert(readings, parameter)
end
end
end
local function go()
for parameters in itermatch(content, '{{ja%-adj|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-noun|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-verb|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-verb%-suru|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-phrase|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-pos|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-altread|([^}]+)}}') do
process(parameters)
end
end
readings = {}
content = mw.title.new(pagename):getContent()
if content then
go()
else
return readings
end
for pagename in itermatch(content, '{{ja%-see|([^|}]+)') do
local readings_old = readings
content = mw.title.new(pagename):getContent()
if content then
go()
end
if #readings_old == #readings then
-- [[解れる]]→[[ほぐれる]], scanning [[ほぐれる]] will find nothing
table.insert(readings, pagename)
end
end
readings = require("Module:table").removeDuplicates(readings)
return readings
end
--Inspired by [[en:Module:User:Suzukaze-c/02]]
function export.newja(frame)
local a = frame.args[1] and frame.args or frame:getParent().args
local m_languages = require('Module:languages')
local m_scripts = require('Module:scripts')
local lang_ja = m_languages.getByCode('ja')
local sc_Jpan = m_scripts.getByCode('Jpan')
local sc_Hani = m_scripts.getByCode('Hani')
local sc_Hrkt = m_scripts.getByCode('Hrkt')
local pagename = a['testing_pagename'] or mw.title.getCurrentTitle().text
local header_level = 2 -- header level
local this_content = mw.title.new(mw.title.getCurrentTitle().text):getContent() or ''
local pos_datas = {
['a'] = {'形容词:adj'},
['n'] = {'名词:noun'},
['p'] = {'短语:phrase'},
['v'] = {'动词:verb'},
['suru'] = {'动词:verb-suru'},
['vf'] = {'Verb:verb form'},
['ad'] = {'副词:pos|副詞'},
['i'] = {'叹词:pos|感嘆詞'},
['pn'] = {'专有名词:pos|專有名詞'},
['af'] = {'形容词:pos|adjective form'},
['c'] = {'连词:pos|連詞'},
['s'] = {'名词:noun', '动词:verb-suru'},
['an'] = {'形容词:adj', '名词:noun'},
}
local pos_aliases = {
[''] = 'n',
}
for pos_codes, array in pairs(pos_datas) do
for i, name_and_template in ipairs(array) do
name_and_template = split(name_and_template, ':')
pos_datas[pos_codes][i] = {
['name'] = name_and_template[1],
['template'] = name_and_template[2],
}
end
end
local verb_types_corresp = {
['1'] = '2', -- ichidan is type=1
['5'] = '1', -- godan is type=1
}
local etymology_magic_words = {
['der ([^.,]+)'] = function(a)
return '{{der|ja|' .. replace(a, '^([a-z]+)', '%1|') .. '}}'
end,
['bor ([^.,]+)'] = function(a)
return '{{bor|ja|' .. replace(a, '^([a-z]+)', '%1|') .. '}}'
end,
['rfe (.+)'] = function(a)
return '{{rfe|ja' .. (a and '|' .. a or '') .. '}}'
end,
}
local usage_notes_magic_words = {
['bio'] = '{{U:ja:biology}}'
}
local output = {}
local params = {
[2] = {list = true, allow_holes = true}, -- pos and def
['en'] = {type = 'number'}, -- etymology number
['e'] = {}, -- etymology text
['l'] = {}, -- alt in header
['y'] = {allow_empty = true}, -- yomi
['r'] = {allow_empty = true}, -- rendaku
['ll'] = {}, -- alt in [[t:ja-kanjitab]]
['w'] = {allow_empty = true}, -- wikipedia
['wzh'] = {allow_empty = true}, -- en.wikipedia
['file'] = {allow_empty = true}, -- file
['nop'] = {allow_empty = true}, -- no [[t:ja-pron]]
['eojad'] = {allow_empty = true},
['a'] = {}, -- accent
['head'] = {}, -- head
[1] = {allow_empty = true, default = ''}, -- kana
['tr'] = {}, -- transivity
['ak'] = {}, -- alternate kana
['mw'] = {}, -- measure word/counter
['kyu'] = {}, -- kyuujitai
['hh'] = {}, -- historical hiragana
['vp'] = {},
['un'] = {}, -- usage notes
['sy'] = {}, -- synonyms
['an'] = {}, -- antonyms
['hsup'] = {}, -- hypernyms
['hsub'] = {}, -- hyponyms. sup and sub bc i'm big dumb
['co'] = {}, -- coordinate terms
['de'] = {}, -- derived terms
['re'] = {}, -- related terms
['desc'] = {}, -- descendants
['al'] = {}, -- see also
['reflist'] = {allow_empty = true}, -- references header
['c'] = {}, -- [[t:C]]
['cln'] = {}, -- [[t:cln]]
['testing_pagename'] = {},
-- `allow_empty = true`
-- instead of `type = 'boolean'`
-- `|r=` is sufficient for saying 'rendaku yes'
-- instead of `|r=y`
['s'] = {alias_of = 'sy'},
['h'] = {alias_of = 'hh'},
['yomi'] = {alias_of = 'y'},
}
a = require('Module:parameters').process(a, params)
local function waapuro_to_kana(text)
if text == '' then return text end -- with just one parameter, [[mod:typing-aids]] will fallback to language 'all' and treat parameter 1 as the text to convert. for us that will return 'ja'
text = replace(text, '-', '@@@') -- preserve hyphen
text = replace(text, '_', '-') -- instead of hyphen, use underscore for chouonpu
text = require('Module:Typing-aids').replace({'ja', text})
text = replace(text, '@@@', '-') -- restore hyphen
return text
end
local function wr(text)
table.insert(output, text)
end
local function wrh(text)
wr('\n' .. mw.ustring.rep('=', header_level) .. text .. mw.ustring.rep('=', header_level))
end
local function hl(n)
header_level = header_level + n
end
local function needs_reading(pagename, kana)
return not (kana or find(pagename, '^[ぁ-ー ^%%.]+$'))
end
local function generate_tab_params(kana, a)
-- 銀行
-- gin,kou
-- gin|kou|yomi=o
-- 文字
-- mon;mo,ji
-- mon|k1=mo|ji|yomi=o
-- 送り仮名
-- [oku]ri[ka;ga][na]
-- oku|ri|ka|k3=ga|na|yomi=k
-- 送仮名
-- [oku:ri][ka;ga][na]
-- oku|o1=ri|ka|k3=ga|na|yomi=k
-- 満漢全席
-- man-kan zen,seki
-- man|kan|zen|seki|yomi=o
-- 犬
-- inu
-- inu
-- because pressing shift is effort
local yomi
local params = {}
if find(kana, '%[') then
yomi = 'k'
for yomigana in itermatch(kana, '%[(.-)%]') do
table.insert(params, yomigana)
end
elseif sc_Hani:countCharacters(pagename) > 0 then
if find(kana, ',') then
yomi = 'o'
end
kana = replace(kana, '%^', '')
kana = replace(kana, '[ .-]', ',')
if kana ~= '' then
params = split(kana, ',')
end
end
for i, yomigana in ipairs(params) do
yomigana = replace(yomigana, ';', '|k' .. i .. '=')
yomigana = replace(yomigana, ':', '|o' .. i .. '=')
params[i] = yomigana
end
if a['r'] then
table.insert(params, 'r=y')
end
yomi = a['y'] or yomi
if yomi then
table.insert(params, 'yomi=' .. yomi)
end
if a['ll'] then
table.insert(params, 'alt=' .. replace(a['ll'], '、', ','))
end
return params
end
local function generate_links_list(text)
-- 3密
-- * {{ja-l|3密}}
-- 3密 sanmitu
-- * {{ja-r|3密|さんみつ}}
-- 3密 sanmitu,q=Qualifier\三つの密 mittu no mitu\gloss=Gloss
-- * {{ja-r|3密|さんみつ}}
-- * {{q|Qualifier}} {{ja-r|三つの密|みっつ の みつ|gloss=Gloss}}
local params = {}
for i, item in ipairs(split(text, '[,、]')) do
item = split(item, '[\\¥]')
local q
local output_link = {}
local main_found = false
local pagename, kana
local r_or_l
while #item > 0 do
if find(item[1], '^q=') then
q = replace(item[1], '^q=', '')
elseif not main_found then
pagename, kana = match(item[1], '^([^ ]-)[ ](.+)$')
pagename = pagename or item[1] -- if match() returns nil
kana = (kana and replace(kana, ' ', ' ') or kana)
kana = (kana and waapuro_to_kana(kana) or kana)
if not kana then
kana = export.extract_ja_readings(pagename)
if #kana == 1 then
kana = kana[1]
else
kana = nil
end
end
r_or_l = needs_reading(pagename, kana) and 'l' or 'r'
table.insert(output_link, 'ja-' .. r_or_l)
table.insert(output_link, pagename)
table.insert(output_link, kana)
main_found = true
else
table.insert(output_link, item[1])
end
table.remove(item, 1)
end
table.insert(
params,
'* ' .. (q and '{{qual|' .. q .. '}} ' or '') .. '{{' .. table.concat(output_link, '|') .. '}}'
)
end
return params
end
local kana = a[1]
local kana_no_hyphens
-- convert kana from romaji to kana
kana = waapuro_to_kana(kana)
-- convert fullwidth CJK symbols to halfwidth
kana = replace(kana, '(.)', {
['「'] = '[',
[';'] = ';',
[':'] = ':',
['」'] = ']',
['、'] = ',',
['。'] = '.',
})
-- generate [[t:ja-kanjitab]]
local tab
if a['ll'] or sc_Hani:countCharacters(pagename) > 0 then
local tab_params = generate_tab_params(kana, a)
tab_params = table.concat(tab_params, '|')
tab = '{{ja-kanjitab' .. (tab_params and '|' .. tab_params) .. '}}'
end
-- remove markup for generating [[t:ja-kanjitab]] from kana
-- kun
kana = replace(kana, '(%[)([^%[%]]-):([^%[%]]-)(%])', '%1%2%4%3')
kana = replace(kana, '(%[)([^%[%]]-);([^%[%]]-)(%])', '%3')
kana = replace(kana, '(%[)([^%[%]]-)(%])', '%2')
-- on
kana = replace(kana, '([ .,-])([^ .,-]-;)', '%1')
kana = replace(kana, '^([^ .,-]-;)', '')
kana = replace(kana, ',', '')
-- for [[t:ja-pron]]
kana_no_hyphens = replace(kana, '[-^]', '')
-- blank if it's the same as the pagename. avoid unnecessary template input
if kana == pagename then kana = '' end
if kana_no_hyphens == pagename then kana_no_hyphens = '' end
-- process etymology
-- process usage notes
if a['e'] then
for magic_word, f in pairs(etymology_magic_words) do
mw.log(magic_word, f)
a['e'] = replace(a['e'], '\\' .. magic_word, f)
end
end
if a['un'] then
a['un'] = replace(a['un'], '\\([a-z]+)', usage_notes_magic_words)
end
-- write Japanese header
-- write etymology section
-- write etymology
-- write [[t:wikipedia]]
-- write [[t:ja-kanjitab]]
-- write alternative forms
if match(this_content, 'ja%-readings') and not a['en'] then
a['en'] = 0
end
if a['en'] then
-- en = 0
-- for adding new sections under a single character entry like [[字]], where you want an Etymology section for sanity, but just one, and no Japanese header
if a['en'] == 1 and not match(this_content, 'ja%-readings') then wrh('日语') end
hl(1)
wrh('词源' .. (a['en'] == 0 and '' or ' ' .. a['en']))
hl(1)
wr(tab and tab or nil)
wr(a['w'] and ('{{wikipedia|lang=ja' .. (a['w'] == '' and '' or '|' .. a['w']) .. '}}') or nil)
wr(a['wzh'] and ('{{wikipedia|lang=zh|' .. a['wzh'] .. '}}') or nil)
wr(a['file'] and ('[[File:' .. a['file'] .. '|thumb|right]]') or nil)
wr(a['e'])
if a['l'] then
wrh('其他形式')
wr(table.concat(generate_links_list(a['l']), '\n'))
end
else
wrh('日语')
hl(1)
wr(tab and tab or nil)
wr(a['w'] and ('{{wikipedia|lang=ja' .. (a['w'] == '' and '' or '|' .. a['w']) .. '}}') or nil)
wr(a['wzh'] and ('{{wikipedia|' .. a['wzh'] .. '}}') or nil)
wr(a['file'] and ('[[File:' .. a['file'] .. '|thumb|right]]') or nil)
if a['l'] then
wrh('其他形式')
wr(table.concat(generate_links_list(a['l']), '\n'))
end
if a['e'] then
wrh('词源')
wr(a['e'])
end
end
if a[2][1] and a[2][1] ~= '' and not find(a[2][1], '[a-z]') then
-- if not given latn text as pos and def
-- write [[t:ja-see]] using that text
if a['e'] then
wrh('定义')
end
wr('{{ja-see|' .. table.concat(a[2], '|') .. '}}')
else
-- write [[t:ja-pron]]
if not a['nop'] then
-- 1DJR,2,3-
-- 0DJR NHK
local pron_params = {}
if kana_no_hyphens ~= '' or needs_reading(pagename, kana) then
table.insert(pron_params, kana_no_hyphens)
end
if a['a'] then
for i, acc_item in ipairs(split(a['a'], '[,、]')) do
local acc, acc_ref = match(acc_item, '^(%d+)(.*)$')
acc_ref = replace(acc_ref, ' ', ',')
acc_ref = string.upper(acc_ref)
if acc_ref == '' then
acc_ref = 'DJR'
elseif acc_ref == '-' then
acc_ref = nil
end
if acc_ref and not a['reflist'] then
a['reflist'] = true
end
table.insert(pron_params, 'acc' .. (i > 1 and i or '') .. '=' .. acc)
table.insert(pron_params, acc_ref and 'acc' .. (i > 1 and i or '') .. '_ref=' .. acc_ref or nil)
end
end
wrh('发音')
wr('{{ja-pron' .. (#pron_params > 0 and '|' .. table.concat(pron_params, '|') or '') .. '}}')
end
if a['eojad'] then
wr('{{ja-ojad}}')
end
-- if no pos or def parameters
-- then generate a default Noun and [[t:rfdef]]
if a[2]['maxindex'] == 0 then
a[2] = {''}
a[2]['maxindex'] = 1
end
i = 1
while i <= a[2]['maxindex'] do
-- 犬 <empty string>
-- 犬 n
-- 赤い a
-- 赤い a,i
-- 明らか a,na
-- 画然 a,tari
-- 異常 an
-- 食べる v,2
local pos_code
local defs
local type, infl
pos_code = a[2][i] or ''
defs = {
a[2][i + 1] or '',
a[2][i + 2] or '',
}
local match_a, match_b = match(pos_code, '^(.+),(.+)$')
if match_a then pos_code, type = match_a, match_b end
pos_code = pos_aliases[pos_code] or pos_code
if pos_code == 'v' and verb_types_corresp[type] then
type = verb_types_corresp[type]
end
-- default type
if not type then
if pos_code == 'an' then
type = 'na'
elseif pos_code == 'v' then
type = '1'
elseif pos_code == 'a' then
type = 'i'
end
end
-- adjectives use infl
if pos_code == 'an' or pos_code == 'a' then
infl = type
type = nil
end
-- get data
local pos_data = pos_datas[pos_code]
-- create fallback data
pos_data = pos_data or {
{
['name'] = replace(pos_code, '^.', mw.ustring.upper),
['template'] = 'pos|' .. pos_code,
},
}
for ii, def in ipairs(defs) do
local def_t = split(def, '%$%$%$')
local processed_def = {}
for _, single_def in ipairs(def_t) do
local usex_start = find(single_def, '%[%[%[')
local usexes = itermatch(single_def, '%[%[%[.*%]%]%]')
local usex_table = {}
for usex in usexes do
usex = sub(usex, 4, len(usex) - 3)
local example, kana, translation = unpack(split(usex,'%$'))
table.insert(usex_table, {['example'] = example, ['kana'] = kana, ['translation'] = translation})
end
single_def = usex_start and sub(single_def, 1, usex_start - 1) or single_def
table.insert(processed_def, {['def'] = single_def, ['usex'] = usex_table})
end
defs[ii] = processed_def
end
-- write header, etc
for ii, name_and_template in ipairs(pos_data) do
local is_a_or_v = false
name = name_and_template['name']
template = name_and_template['template']
is_a_or_v = match(template, '^adj') or match(template, '^verb') -- prevent inappropriate addition of parameters (`noun|infl=na`)
template = template .. (a['head'] and '|head=' .. a['head'] or '')
template = template .. (kana ~= '' and '|' .. kana or '')
template = template .. (a['ak'] and '|' .. replace(waapuro_to_kana(a['ak']), ',', '|') or '')
if is_a_or_v then
template = template .. (a['tr'] and '|tr=' .. a['tr'] or '')
template = template .. (type and '|type=' .. type or '')
template = template .. (infl and '|infl=' .. infl or '')
end
template = template .. (a['mw'] and '|count=' .. a['mw'] or '')
template = template .. (a['kyu'] and '|kyu=' .. a['kyu'] or '')
template = template .. (a['hh'] and '|hhira=' .. waapuro_to_kana(a['hh']) or '')
wrh(name)
wr('{{ja-' .. template .. '}}')
if match(template, '^verb') and a['vp'] then
wr('{{ja-vp|' .. replace(a['vp'], ',', '|') .. '}}')
end
wr('')
for iii, def in ipairs(defs[ii]) do
wr('# ' .. def['def'])
for iiii, usex in ipairs(def['usex']) do
wr('#: {{ja-usex|'.. usex['example'] ..'|'..waapuro_to_kana(usex['kana']).. (usex['translation'] and ('|' .. usex['translation']) or '') .. '}}')
end
end
if is_a_or_v then
local kana_stem = sub(kana ~= '' and kana or pagename, 0, -2)
kana_stem = (kana == '' and kana or kana_stem) -- the templates will be smart if you do not give it a reading
local kana_last = sub(kana ~= '' and kana or pagename, -1)
kana_last = lang_ja:transliterate(kana_last, sc_Hrkt)
if type or pos_code == 's' or pos_code == 'suru' then
hl(1)
wrh('活用形')
if type == '2' then
wr('{{ja-ichi' .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}')
elseif pos_code == 's'or pos_code == 'suru' then
wr('{{ja-suru' .. (kana ~= '' and '|' .. kana or '') .. '}}')
else
wr('{{ja-go-' .. kana_last .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}')
end
hl(-1)
end
if infl then
hl(1)
wrh('活用形')
if infl == 'na' then
wr('{{ja-' .. infl .. (kana ~= '' and '|' .. kana or '') .. '}}')
else
wr('{{ja-' .. infl .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}')
end
hl(-1)
end
end
if i == 1 and ii == 1 then
hl(1)
if a['un'] then
wrh('用法说明')
wr('* ' .. a['un'])
end
if a['sy'] then
wrh('近义词')
wr(table.concat(generate_links_list(a['sy']), '\n'))
end
if a['an'] then
wrh('反义词')
wr(table.concat(generate_links_list(a['an']), '\n'))
end
if a['hsup'] then
wrh('上位词')
wr(table.concat(generate_links_list(a['hsup']), '\n'))
end
if a['hsub'] then
wrh('下位词')
wr(table.concat(generate_links_list(a['hsub']), '\n'))
end
if a['co'] then
wrh('同类词汇')
wr(table.concat(generate_links_list(a['co']), '\n'))
end
if a['de'] then
wrh('衍生词汇')
wr(table.concat(generate_links_list(a['de']), '\n'))
end
if a['re'] then
wrh('相关词汇')
wr(table.concat(generate_links_list(a['re']), '\n'))
end
if a['desc'] then
wrh('后代词汇')
wr('* ' .. a['desc'])
end
if a['al'] then
wrh('参见')
wr(table.concat(generate_links_list(a['al']), '\n'))
end
hl(-1)
end
end
-- advance i by the number of PoS headers produced
i = i + 1 + (#pos_data)
end
end
if a['reflist'] and a['reflist'] ~= 'n' then
if a['en'] then
hl(-1)
end
wrh('参考资料')
wr('<references />')
end
if a['c']then
wr('')
if a['c'] then
a['c'] = replace(a['c'], '^.', mw.ustring.upper)
a['c'] = replace(a['c'], '(,)(.)', function(a, b) return a .. mw.ustring.upper(b) end)
local categories = split(a['c'], '[,]')
for i, v in ipair(categories) do
wr('[[Category:日语 '.. v ..']')
end
--wr('{{C|ja|' .. replace(a['c'], '[,]', '|') .. '}}')
end
end
-- trim: otherwise `Etymology n` headers create a leading newline
return trim(table.concat(output, '\n'))
end
return export