Module:auto cat

Vanuit Wiktionary, die vrye woordeboek.

Dokumentasie vir hierdie module kan geskep word by: Module:auto cat/doc

local export = {}


local function splitLabelLang(titleObject)
	local getByCanonicalName = require("Module:languages").getByCanonicalName
	local canonicalName
	local lang
	
	-- Progressively add another word to the potential canonical name until it
	-- matches an actual canonical name.
	local words = mw.text.split(titleObject.text, " ")
	canonicalName = words[#words]
	lang = getByCanonicalName(canonicalName)
	
	local label = lang and mw.getContentLanguage():lcfirst(titleObject.text:sub(1, #titleObject.text - #canonicalName - 4))
		or titleObject.text
	
	return label, lang
end


local function getObj(name, family)
	if not family then
		return require("Module:languages").getByCanonicalName(name)
			or require("Module:etymology languages").getByCanonicalName(name)
	else
		return require("Module:families").getByCanonicalName(name)
	end
end


-- List of handler functions that try to match the page name.
-- A handler should return a table of template title plus arguments
-- that is passed to frame:expandTemplate.
-- If a handler does not recognise the page name, it should return nil.
-- Note that the order of functions matters!

local handlers = {}

local function add_handler(func)
	table.insert(handlers, func)
end


-- request cat
add_handler(function(titleObject)
	if not titleObject.text:find("^Requests") then
		return nil
	end
	
	return {title = "request cat"}
end)


local personal_name_types = {
	"surnames", "given names",
	"male given names", "female given names", "unisex given names",
	"diminutives of male given names", "diminutives of female given names",
	"diminutives of unisex given names",
	"augmentatives of male given names", "augmentatives of female given names",
	"augmentatives of unisex given names"
}

-- topic name cat; must go before general topic cat
add_handler(function(titleObject)
	local code, label = titleObject.text:match("^([a-z-]+):(.+)$")

	if not code then
		return nil
	end

	for _, nametype in ipairs(personal_name_types) do
		local sourcename = label:match("^(.+) " .. nametype .. "$")
		
		if sourcename then
			local source = getObj(sourcename)
			if source then
				return {title = "topic name cat", args = {code, source:getCode(), nametype}}
			end
		end
	end

	return nil
end)


-- Topical categories
add_handler(function(titleObject)
	if not titleObject.text:find("^[a-z-]+:.") then
		return nil
	end
	
	local code, label = titleObject.text:match("^([a-z-]+):(.+)$")
	return {title = "topic cat", args = {code, label}}
end)
local topic_cat_with_lang = handlers[#handlers]


-- script cat
-- should go before things like "derived cat" that also look for categories
-- ending in "languages"
add_handler(function(titleObject)
	local script_labels = {
		"appendices",
		"characters",
		"characters by language",
		"languages",
		"modules",
		"templates",
	}
	local scripts_without_script_in_category = {
		-- We hardcode this rather than checking for all scripts without the
		-- word "script", else we'd trigger on "Arabic languages", because
		-- Arabic is both a script and family (as well as a language).
		["Morse code"] = true,
		["Flag semaphore"] = true,
	}
	local script, label = titleObject.text:match("^(.+) (script)$")
	if not script and scripts_without_script_in_category[titleObject.text] then
		script = titleObject.text
		label = "script"
	end
	if not script then
		for _, lab in ipairs(script_labels) do
			script, label = titleObject.text:match("^(.+) script (" .. lab .. ")$")
			if script then
				break
			end
			-- Check for e.g. 'Morse code characters' or 'Flag semaphore templates'.
			script, label = titleObject.text:match("^(.+) (" .. lab .. ")$")
			if script then
				if scripts_without_script_in_category[script] then
					break
				else
					script = nil
				end
			end
		end
	end
	if not script then
		return nil
	end

	local scriptObj = require("Module:scripts").getByCanonicalName(script) or
		-- [[Category:Undetermined script languages]] vs. name of script = "undetermined" 
		require("Module:scripts").getByCanonicalName(mw.ustring.lower(script))
	if not scriptObj then
		return nil
	end
	
	if label == "script" then
		return { title = "script cat", args = { scriptObj:getCode() } }
	else
		return { title = "script cat", args = { scriptObj:getCode(), label } }
	end
end)


--[[	langcatboiler
		Shouldn't be used because there are additional parameters, such as
		countries where that the language is or was spoken,
		that should always be supplied.

add_handler(function(titleObject)
	if titleObject.text:find(" by language$") or not titleObject.text:find("[lL]anguage$") then
		return nil
	end
	
	local langName = titleObject.text:match("^(.+) language$")
	
	-- Use the entire category name if it doesn't end in "language", to handle
	-- cases where "language" is part of the name, e.g. ASL.
	local lang = require("Module:languages").getByCanonicalName(langName) or require("Module:languages").getByCanonicalName(titleObject.text)
	
	if not lang then
		local lang2 = require("Module:languages").getByName(langName)
		if lang2 then
			error('"' .. langName .. '" is not a valid canonical name. Use "' .. lang2:getCanonicalName() .. '" instead.')
		end
		
		return nil
	end
	
	return { title = "langcatboiler", args = { lang:getCode() } }
end)
 
--]]

-- Letter names
add_handler(function(titleObject)
	if not titleObject.text:find("letter names$") then
		return nil
	end
	
	local langCode = titleObject.text:match("^([^:]+):")
	local lang, cat
	
	if langCode then
		lang = require("Module:languages").getByCode(langCode) or error('The language code "' .. langCode .. '" is not valid.')
		cat = titleObject.text:match(":(.+)$")
	else
		cat = titleObject.text
	end
	
	return {title = "topic cat", args = {lang and lang:getCode() or nil, cat}}
end)


-- letter cat
add_handler(function(titleObject)
	-- Only recognize cases consisting of an uppercase letter followed by the
	-- corresponding lowercase letter, either as the entire category name or
	-- followed by a colon (for cases like [[Category:Gg: ⠛]]). Cases that
	-- don't fit this profile (e.g. for Turkish [[Category:İi]] and
	-- [[Category:Iı]]) need to call {{letter cat}} directly. Formerly this
	-- handler was much less restrictive and would fire on categories named
	-- [[Category:zh:]], [[Category:RFQ]], etc.
	local upper, lower = mw.ustring.match(titleObject.text, "^(%u)(%l)%f[:%z]")
	if not upper or mw.ustring.upper(lower) ~= upper then
		return nil
	end

	return {title = "letter cat"}
end)


-- Japanese kanji reading cat
add_handler(function(titleObject)
	if not titleObject.text:find("^Japanese kanji") then
		return nil
	end
	
	return {title = "ja-readings-cat"}
end)


-- Okinawan kanji reading cat
add_handler(function(titleObject)
	if not titleObject.text:find("^Okinawan kanji") then
		return nil
	end
	
	return {title = "ryu-readings-cat"}
end)


-- FIXME! Move this to a general word-to-number converter.
local word_to_number = {
	one = 1,
	two = 2,
	three = 3,
	four = 4,
	five = 5,
	six = 6,
	seven = 7,
	eight = 8,
	nine = 9,
	ten = 10,
	eleven = 11,
	twelve = 12,
	thirteen = 13,
	fourteen = 14,
	fifteen = 15,
	sixteen = 16,
	seventeen = 17,
	eighteen = 18,
	nineteen = 19,
	twenty = 20
}


-- Japanese/Okinawan character count cat
add_handler(function(titleObject)
	local langname, count = titleObject.text:match("^(.+) terms written with (.+) Han script characters?$")
	if not langname then
		return nil
	end
	local langcode
	if langname == "Japanese" then
		langcode = "ja"
	elseif langname == "Okinawan" then
		langcode = "ryu"
	else
		return nil
	end
	local num = word_to_number[count]
	if not num then
		return nil
	end
	return {title = langcode .. "-cat-written with n kanji", args = {num}}
end)


-- Japanese/Okinawan read-as cat
add_handler(function(titleObject)
	local langname, reading = titleObject.text:match("^(.+) terms spelled with kanji read as (.+)$")
	if not langname then
		return nil
	end
	local langcode
	if langname == "Japanese" then
		langcode = "ja"
	elseif langname == "Okinawan" then
		langcode = "ryu"
	else
		return nil
	end
	return {title = langcode .. "-readascat", args = {reading}}
end)


-- Unicode block cat
add_handler(function(titleObject)
	if not titleObject.text:find("block$") then
		return nil
	end
	
	return {title = "Unicode block cat"}
end)


-- Proto-Indo-European xxx-shape roots
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang or lang:getCode() ~= "ine-pro" then
		return nil
	end
	
	local shape = label:match("^(.+)-shape roots$")
	
	if not shape then
		return nil
	end
	
	return {title = "ine-root shape cat", args = {shape}}
end)


-- Dutch prefixed verbs
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang or lang:getCode() ~= "nl" then
		return nil
	end
	
	local pref = label:match("^prefixed verbs with (.+%-)$")
	
	if not pref then
		return nil
	end
	
	return {title = "nl-pref verb cat", args = {pref}}
end)


-- Dutch separable verbs
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang or lang:getCode() ~= "nl" then
		return nil
	end
	
	local sep = label:match("^separable verbs with (.+)$")
	
	if not sep then
		return nil
	end
	
	return {title = "nl-sep verb cat", args = {sep}}
end)


-- PIE root cat
add_handler(function(titleObject)
	if not titleObject.text:find("[Tt]erms derived from the PIE root") then
		return nil
	end
	
	return {title = "PIE root cat"}
end)


-- PIE word cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[tT]erms derived from the PIE word %*") then
		return nil
	end
	
	local word = label:match("^[Tt]erms derived from the PIE word %*(.+)$")
	return {title = "PIE word cat", args = {lang and lang:getCode() or nil, word}}
end)


-- ar-root cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not (lang and lang:getCode() == "ar" and label:find("^terms belonging to the root .+")) then
		return nil
	end
	
	return {title = "ar-root cat"}
end)


--HE root cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not (lang and lang:getCode() == "he" and label:find("^terms belonging to the root .+")) then
		return nil
	end
	
	local root = label:match("^terms belonging to the root (.+)$")
	local parts = mw.text.split(root, "־", true)
	return {title = "HE root cat", args = parts}
end)


-- he-patterncat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not (lang and lang:getCode() == "he" and label:find("^terms in the pattern .+")) then
		return nil
	end
	
	return {title = "he-patterncat"}
end)


-- root cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	local sourcename, root = label:match("^[Tt]erms derived from the (.+) root (.+)$")
	if not sourcename then
		return nil
	end
	
	local source = getObj(sourcename)
	
	if source then
		return {title = "root cat", args = {lang and lang:getCode() or nil, source:getCode(), root}}
	end
end)


-- tone cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang then
		return nil
	end
	
	local pos, tone = label:match("^(.+) with tone ([^ ]+)$")
	if not pos then
		return nil
	end
	
	return {title = "tone cat", args = {lang:getCode(), pos, tone}}
end)


-- classifier cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang then
		return nil
	end
	
	local pos, classifier = label:match("^(nouns) classified by (.+)$")
	if not pos then
		return nil
	end
	
	return {title = "classifier cat", args = {lang:getCode(), pos, classifier}}
end)


-- derived cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Tt]erms derived from .") then
		return nil
	end
	
	local sourcename = label:match("^[Tt]erms derived from (.+)$")
	local source
	
	if sourcename:find(" [Ll]anguages$") then
		sourcename = sourcename:gsub(" languages$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "derived cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- inherited cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Tt]erms inherited from .") then
		return nil
	end
	
	local sourcename = label:match("^[Tt]erms inherited from (.+)$")
	local source = getObj(sourcename)
	
	if source then
		return {title = "inherited cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- borrowed cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Tt]erms borrowed from .") then
		return nil
	end
	
	local sourcename = label:match("^[Tt]erms borrowed from (.+)$")
	local source
	
	if sourcename:find(" [Ll]anguages$") then
		sourcename = sourcename:gsub(" languages$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "borrowed cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- unadapted borrowing cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Uu]nadapted borrowings from .") then
		return nil
	end
	
	local sourcename = label:match("^[Uu]nadapted borrowings from (.+)$")
	local source
	
	if sourcename:find(" [Ll]anguages$") then
		sourcename = sourcename:gsub(" languages$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "unadapted borrowing cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- calque cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Tt]erms calqued from .") then
		return nil
	end
	
	local sourcename = label:match("^[Tt]erms calqued from (.+)$")
	local source
	
	if sourcename:find(" [Ll]anguages$") then
		sourcename = sourcename:gsub(" languages$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "calque cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)

-- semantic loan cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Ss]emantic loans from .") then
		return nil
	end
	
	local sourcename = label:match("^[Ss]emantic loans from (.+)$")
	local source
	
	if sourcename:find(" [Ll]anguages$") then
		sourcename = sourcename:gsub(" languages$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "semantic loan cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)

-- translit cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^terms transliterated from other languages") then
		return nil
	end
	
	return {title = "translit cat", args = {lang and lang:getCode() or nil}}
end)


-- translit cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Tt]ransliterations of") then
		return nil
	end
	
	local sourcename = label:match("[Tt]ransliterations of (.+) terms")
	local source = getObj(sourcename)
	
	if not lang then
		local lang = ""
	end
	
	if source then
		return {title = "translit cat", args = {lang and lang:getCode(), source:getCode()}}
	end
end)


-- circumfix cat, infix cat, interfix cat, prefix cat, suffix cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	for _, affixtype in ipairs({"circumfix", "infix", "interfix", "prefix", "suffix"}) do
		if label:find("^.+ " .. affixtype .. "ed with .") then
			local pos, after = label:match("^(.+) " .. affixtype .. "ed with (.+)$")
			
			if pos == "words" then
				pos = nil
			end
			
			local term, id
			
			if after:find(". %([^()]+%)$") then
				term, id = after:match("^(.+) %(([^()]+)%)$")
			else
				term = after
			end
			
			return {title = affixtype .. " cat", args = {lang:getCode(), term, pos = pos, id = id}}
		end
	end
end)


-- name cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang then
		return nil
	end
	
	for _, nametype in ipairs(personal_name_types) do
		local sourcename = label:match("^" .. nametype .. " from (.+)$")
		
		if sourcename then
			local source = getObj(sourcename)
			source = source and source:getCode() or sourcename
			
			return {title = "name cat", args = {lang:getCode(), source, nametype}}
		end
	end

	return nil
end)


-- charactercat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)

	-- Don't fire on [[Category:Japanese terms spelled with 発 read as はつ]]
	-- and similar.
	if not label:find("^terms spelled with .+") or label:find("^terms spelled with .+ read as .+") then
		return nil
	end
	
	local term = label:match("^terms spelled with (.+)$")
	return {title = "charactercat", args = {lang:getCode(), term}}
end)


-- phrasebook cat
add_handler(function(titleObject)
	if titleObject.text == "Phrasebooks by language" then
		return {title = "phrasebook cat", args = {}}
	else
		local label, lang = splitLabelLang(titleObject)
		
		if label == "phrasebook" then
			return {title = "phrasebook cat", args = {lang:getCode()}}
		elseif label:find("^phrasebook/.") then
			label = label:match("^phrasebook/(.+)$")
			return {title = "phrasebook cat", args = {lang:getCode(), label}}
		end
	end
end)


-- no entry cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("entries that don't exist$") then
		return nil
	end
	
	return { title = "no entry cat", args = { lang:getCode() } }
end)

-- Azerbaijani compound vebs
add_handler(function(titleObject)
	local with_what = titleObject.text:match("^Azerbaijani compound verbs with (.+)$")
	if not with_what then
		return nil
	end
	
	return {title = "az-compound cat", args = { with_what }}
end)


--[[	family cat
		
		Must go after the "derived", "borrowed", and "transliterated" category handlers,
		which sometimes have "languages" at the end, but before poscatboiler,
		or families that have the same names as languages will get intercepted.
]]
add_handler(function(titleObject)
	if not titleObject.text:find("languages$") then
		return nil
	end
	
	local familyName = titleObject.text:match("^(.+) languages$")
	
	local family = require("Module:families").getByCanonicalName(familyName) or
		require("Module:families").getByCanonicalName(mw.ustring.lower(familyName))
	
	if not family then
		return nil
	end
	
	return { title = "family cat", args = { family:getCode() } }
end)


-- poscatboiler
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	if lang then
		local baseLabel, script = label:match("(.+) in (.-) script$")
		if script and baseLabel ~= "terms" then
			local scriptObj = require("Module:scripts").getByCanonicalName(script)
			if scriptObj then
				return {title = "poscatboiler", args = {lang:getCode(), baseLabel, sc = scriptObj:getCode() }}
			end
		end
		return {title = "poscatboiler", args = {lang:getCode(), label}}
	elseif label:find(". by language$") then
		local label = mw.getContentLanguage():lcfirst(label:match("^(.+) by language$"))
		return {title = "poscatboiler", args = {nil, label}}
	end
end)


-- redundant translit cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^Terms with redundant transliterations") then
		return nil
	end
	
	langCode = label:match("/(.+)")
	
	if langCode then
		return {title = "redundant translit cat", args = {langCode}}
	end
end)


-- manual translit diff cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^Terms with manual transliterations different from the automated ones") then
		return nil
	end
	
	local langCode = label:match("/(.+)")
	
	if langCode then
		return {title = "manual translit diff cat", args = {langCode}}
	end
end)


-- topic cat
add_handler(function(titleObject)
	return {title = "topic cat", args = {nil, titleObject.text}}
end)
local topic_cat_without_lang = handlers[#handlers]


function export.show(frame)
	local args = require("Module:parameters").process(frame:getParent().args, {
		nopos = { type = "boolean" },
	})
	local titleObject = mw.title.getCurrentTitle()
	
	if titleObject.nsText == "Sjabloon" then
		return "(This template should be used on pages in the Category: namespace.)"
	elseif titleObject.nsText ~= "Kategorie" then
		error("This template/module can only be used on pages in the Category: namespace.")
	end

	local first_error_cattext

	-- Go through each handler in turn. If a handler doesn't recognize the format of the
	-- category, it will return nil, and we will consider the next handler. Otherwise,
	-- it returns a template name and arguments to call it with, but even then, that template
	-- might return an error, and we need to consider the next handler. This happens,
	-- for example, with the category "CAT:Mato Grosso, Brazil", where "Mato" is the name of
	-- a language, so the handler for {{poscatboiler}} fires and tries to find a label
	-- "Grosso, Brazil". This throws an error, and previously, this blocked fruther handler
	-- consideration, but now we check for the error and continue checking handlers;
	-- eventually, {{topic cat}} will fire and correctly handle the category.
	--
	-- FIXME: Will the topic_cat handlers correctly handle "letter names" categories?
	for _, handler in ipairs(args.nopos and { topic_cat_with_lang, topic_cat_without_lang } or handlers) do
		local templateObject = handler(titleObject)
		
		if templateObject then
			require("Module:debug").track("auto cat/" .. templateObject.title)
			local cattext = frame:expandTemplate(templateObject)
			-- FIXME! We check for specific text found in most or all error messages generated
			-- by category tree templates (in particular, the second piece of text below should be
			-- in all error messages generated when a given module doesn't recognize a category name).
			-- If this text ever changes in the source modules (e.g. [[Module:category tree]],
			-- it needs to be changed here as well.)
			if cattext:find("Category:Categories with invalid label") or
				cattext:find("The automatically%-generated contents of this category has errors") then
				if not first_error_cattext then
					first_error_cattext = cattext
				end
			else
				return cattext
			end
		end
	end
	
	if first_error_cattext then
		return first_error_cattext
	end
	error("{{auto cat}} couldn't recognize format of category name")
end

-- test function for injecting title string
function export.test(title)
	if type(title) == "table" then
		title = title:getParent().args[1]
	end
	
	local titleObject = {}
	titleObject.text = title
	
	for _, handler in ipairs(handlers) do
		local t = handler(titleObject)
		
		if t then
			return t.title
		end
	end	
end

return export

-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: