Modul:form of

Daripada Wikikamus
Laman modul ini kekurangansublaman pendokumenan. Silacipta laman pendokumenan tersebut.
Pautan berguna:senarai sublaman •pautan •transklusi •kes ujian •kotak pasir
localexport={}export.force_cat=false-- for testing; set to true to display categories even on non-mainspace pageslocaldebug_track_module="Module:debug/track"localetymology_module="Module:etymology"localform_of_cats_module="Module:form of/cats"localform_of_data_module="Module:form of/data"localform_of_data1_module="Module:form of/data/1"localform_of_data2_module="Module:form of/data/2"localform_of_functions_module="Module:form of/functions"localform_of_lang_data_module_prefix="Module:form of/lang-data/"localfunction_module="Module:fun"localheadword_data_module="Module:headword/data"localjson_module="Module:JSON"locallabels_module="Module:labels"locallinks_module="Module:links"localload_module="Module:load"localparse_utilities_module="Module:parse utilities"localstring_utilities_module="Module:string utilities"localtable_module="Module:table"localtable_deep_equals_module="Module:table/deepEquals"localutilities_module="Module:utilities"localanchor_encode=mw.uri.anchorEncodelocalconcat=table.concatlocaldump=mw.dumpObjectlocalfetch_categories_and_labels-- Defined below.localformat_form_of-- Defined below.localget_tag_display_form-- Defined below.localget_tag_set_display_form-- Defined below.localinsert=table.insertlocalipairs=ipairslocalis_link_or_html-- Defined below.locallist_to_text=mw.text.listToTextlocallookup_shortcut-- Defined below.locallookup_tag-- Defined below.localnormalize_tag_set-- Defined below.localparse_tag_set_properties-- Defined below.localrequire=requirelocalsort=table.sortlocalsplit_tag_set-- Defined below.localtype=type--[==[Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]localfunctionappend(...)append=require(table_module).appendreturnappend(...)endlocalfunctioncontains(...)contains=require(table_module).containsreturncontains(...)endlocalfunctiondebug_track(...)debug_track=require(debug_track_module)returndebug_track(...)endlocalfunctiondeep_copy(...)deep_copy=require(table_module).deepCopyreturndeep_copy(...)endlocalfunctiondeep_equals(...)deep_equals=require(table_deep_equals_module)returndeep_equals(...)endlocalfunctionextend(...)extend=require(table_module).extendreturnextend(...)endlocalfunctionformat_categories(...)format_categories=require(utilities_module).format_categoriesreturnformat_categories(...)endlocalfunctionfull_link(...)full_link=require(links_module).full_linkreturnfull_link(...)endlocalfunctioninsert_if_not(...)insert_if_not=require(table_module).insertIfNotreturninsert_if_not(...)endlocalfunctionis_subset_list(...)is_subset_list=require(table_module).isSubsetListreturnis_subset_list(...)endlocalfunctioniterate_from(...)iterate_from=require(function_module).iterateFromreturniterate_from(...)endlocalfunctionjoin_multiparts(...)join_multiparts=require(form_of_functions_module).join_multipartsreturnjoin_multiparts(...)endlocalfunctionload_data(...)load_data=require(load_module).load_datareturnload_data(...)endlocalfunctionparse_inline_modifiers(...)parse_inline_modifiers=require(parse_utilities_module).parse_inline_modifiersreturnparse_inline_modifiers(...)endlocalfunctionsafe_load_data(...)safe_load_data=require(load_module).safe_load_datareturnsafe_load_data(...)endlocalfunctionsafe_require(...)safe_require=require(load_module).safe_requirereturnsafe_require(...)endlocalfunctionserial_comma_join(...)serial_comma_join=require(table_module).serialCommaJoinreturnserial_comma_join(...)endlocalfunctionshallow_copy(...)shallow_copy=require(table_module).shallowCopyreturnshallow_copy(...)endlocalfunctionshow_labels(...)show_labels=require(labels_module).show_labelsreturnshow_labels(...)endlocalfunctionslice(...)slice=require(table_module).slicereturnslice(...)endlocalfunctionsplit(...)split=require(string_utilities_module).splitreturnsplit(...)endlocalfunctionucfirst(...)ucfirst=require(string_utilities_module).ucfirstreturnucfirst(...)end--[==[Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]localcat_functionslocalfunctionget_cat_functions()cat_functions,get_cat_functions=require(form_of_functions_module).cat_functions,nilreturncat_functionsendlocaldefault_pagenamelocalfunctionget_default_pagename()default_pagename,get_default_pagename=load_data(headword_data_module).pagename,nilreturndefault_pagenameendlocaldisplay_handlerslocalfunctionget_display_handlers()display_handlers,get_display_handlers=require(form_of_functions_module).display_handlers,nilreturndisplay_handlersendlocalm_cats_datalocalfunctionget_m_cats_data()m_cats_data,get_m_cats_data=load_data(form_of_cats_module),nilreturnm_cats_dataendlocalm_datalocalfunctionget_m_data()-- Needs require.m_data,get_m_data=require(form_of_data_module),nilreturnm_dataendlocalm_data1localfunctionget_m_data1()m_data1,get_m_data1=load_data(form_of_data1_module),nilreturnm_data1endlocalm_data2localfunctionget_m_data2()m_data2,get_m_data2=load_data(form_of_data2_module),nilreturnm_data2endlocalm_pos_datalocalfunctionget_m_pos_data()m_pos_data,get_m_pos_data=load_data(headword_data_module).pos_aliases,nilreturnm_pos_dataend--[==[ intro:This module implements the underlying processing of {{tl|form of}}, {{tl|inflection of}} and specific variants such as{{tl|past participle of}} and {{tl|alternative spelling of}}. Most of the logic in this file is to handle tags in{{tl|inflection of}}. Other related files:* [[Module:form of/templates]] contains the majority of the logic that implements the templates themselves.* [[Module:form of/data/1]] is a data-only file containing information on the more common inflection tags, listing the  tags, their shortcuts, the category they belong to (tense-aspect, case, gender, voice-valence, etc.), the appropriate  glossary link and the wikidata ID.* [[Module:form of/data/2]] is a data-only file containing information on the less common inflection tags, in the same  format as [[Module:form of/data/1]].* [[Module:form of/lang-data/LANGCODE]] is a data-only file containing information on the language-specific inflection  tags for the language with code LANGCODE, in the same format as [[Module:form of/data/1]]. Language-specific tags  override general tags.* [[Module:form of/cats]] is a data-only file listing the language-specific categories that are added when the  appropriate combinations of tags are seen for a given language.* [[Module:form of/functions]] contains functions for use with [[Module:form of/data/1]] and [[Module:form of/cats]].  They are contained in this module because data-only modules can't contain code. The functions in this file are of two  types:*# Display handlers allow for customization of the display of multipart tags (see below). Currently there is only   one such handler, for handling multipart person tags such as `1//2//3`.*# Cat functions allow for more complex categorization logic, and are referred to by name in [[Module:form of/cats]].   Currently no such functions exist.The following terminology is used in conjunction with {{tl|inflection of}}:* A ''tag'' is a single grammatical item, as specified in a single numbered parameter of {{tl|inflection of}}. Examples  are `masculine`, `nominative`, or `first-person`. Tags may be abbreviated, e.g. `m` for `masculine`, `nom` for  `nominative`, or `1` for `first-person`. Such abbreviations are called ''aliases'', and some tags have multiple  equivalent aliases (e.g. `p` or `pl` for `plural`). The full, non-abbreviated form of a tag is called its  ''canonical form''.* The ''display form'' of a tag is the way it's displayed to the user. Usually the displayed text of the tag is the same  as its canonical form, and it normally functions as a link to a glossary entry explaining the tag. Usually the link is  to an entry in [[Appendix:Glossary]], but sometimes the tag is linked to an individual dictionary entry or to a  Wikipedia entry. Occasionally, the display text differs from the canonical form of the tag. An example is the tag  `comparative case`, which has the display text read as simply `comparative`. Normally, tags referring to cases don't  have the word "case" in them, but in this case the tag `comparative` was already used as an alias for the tag  `comparative degree`, so the tag was named `comparative case` to avoid clashing. A similar situation occurs with  `adverbial case` vs. the grammar tag `adverbial` (as in `adverbial participle`).* A ''tag set'' is an ordered list of tags, which together express a single inflection, for example, `1|s|pres|ind`,  which can be expanded to canonical-form tags as `first-person|singular|present|indicative`.* A ''conjoined tag set'' is a tag set that consists of multiple individual tag sets separated by a semicolon, e.g.  `1|s|pres|ind|;|2|s|imp`, which specifies two tag sets, `1|s|pres|ind` as above and `2|s|imp` (in canonical form,  `second-person|singular|imperative`). Multiple tag sets specified in a single call to {{tl|inflection of}} are  specified in this fashion. Conjoined tag sets can also occur in list-tag shortcuts.* A ''multipart tag'' is a tag that embeds multiple tags within it, such as `f//n` or `nom//acc//voc`. These are used in  the case of [[syncretism]], when the same form applies to multiple inflections. Examples are the Spanish present  subjunctive, where the first-person and third-person singular have the same form (e.g. {{m|es|siga}} from  {{m|es|seguir|t=to follow}}), or Latin third-declension adjectives, where the dative and ablative plural of all  genders have the same form (e.g. {{m|la|omnibus}} from {{m|la|omnis|t=all}}). These would be expressed respectively as  `1//3|s|pres|sub` and `dat//abl|m//f//n|p`, where the use of the multipart tag compactly encodes the syncretism and  avoids the need to individually list out all of the inflections. Multipart tags currently display as a list separated  by a slash, e.g.  ''dative/ablative'' or ''masculine/feminine/neuter'' where each individual word is linked  appropriately. As a special case, multipart tags involving persons display specially; for example, the multipart tag  `1//2//3` displays as ''first-, second- and third-person'', with the word "person" occurring only once.* A ''two-level multipart tag'' is a special type of multipart tag that joins two or more tag sets instead of joining  individual tags. The tags within the tag set are joined by a colon, e.g. `1:s//3:p`, which is displayed as  ''first-person singular and third-person plural'', e.g. for use with the form {{m|grc|μέλλον}} of the verb  {{m|grc|μέλλω|t=to intend}}, which uses the tag set `1:s//3:p|impf|actv|indc|unaugmented` to express the syncretism  between the first singular and third plural forms of the imperfect active indicative unaugmented conjugation.  Two-level multipart tags should be used sparingly; if in doubt, list out the inflections separately. [FIXME: Make  two-level multipart tags obsolete.]* A ''shortcut'' is a tag that expands to any type of tag described above, or to any type of tag set described above.  Aliases are a particular type of shortcut whose expansion is a single non-multipart tag.* A ''multipart shortcut'' is a shortcut that expands into a multipart tag, for example `123`, which expands to the  multipart tag `1//2//3`. Only the most common such combinations exist as shortcuts.* A ''list shortcut'' is a special type of shortcut that expands to a list of tags instead of a single tag. For example,  the shortcut `1s` expands to `1|s` (first-person singular). Only the most common such combinations exist as shortcuts.* A ''conjoined shortcut'' is a special type of list shortcut that consists of a conjoined tag set (multiple logical tag  sets). For example, the English language-specific shortcut `ed-form` expands to `spast|;|past|part`, expressing the  common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list  shortcut that expands to `simple|past`).]==]-- Add tracking category for PAGE when called from {{inflection of}} or-- similar TEMPLATE. The tracking category linked to is-- [[Wiktionary:Tracking/inflection of/PAGE]].localfunctiontrack(page)debug_track("inflection of/"..-- avoid including links in pages (may cause error)page:gsub("%[","("):gsub("%]",")"):gsub("|","!"))endlocalfunctionwrap_in_span(text,classes)ifclassesthenreturn("<span class='%s'>%s</span>"):format(classes,text)elsereturntextendendlocalfunctionshow_linked_term(data)localtermobj,face,span_classes,ok_to_destructively_modify,overall_lang,text_classes=data.termobj,data.face,data.span_classes,data.ok_to_destructively_modify,data.overall_lang,data.text_classeslocalneed_to_copy,pretext_langlocalcategories={}ifoverall_langandoverall_lang:getCode()~=termobj.lang:getCode()thenlocallang_displaylang_display,categories=require(etymology_module).insert_source_cat_get_display{lang=data.overall_lang,source=termobj.lang,}pretext_lang=wrap_in_span(lang_display.." ",text_classes)endlocalneed_to_show_qualifiers=termobj.qortermobj.qqortermobj.aortermobj.aaortermobj.lortermobj.llortermobj.refsneed_to_copy=notok_to_destructively_modifyand(pretext_langorneed_to_show_qualifiers)ifneed_to_copythentermobj=shallow_copy(termobj)endifpretext_langthentermobj.pretext=pretext_langendifneed_to_show_qualifiersthentermobj.show_qualifiers=trueendreturnwrap_in_span(full_link(termobj,face),span_classes),categoriesend--[==[Lowest-level implementation of form-of templates, including the general {{tl|form of}} as well as those that deal withinflection tags, such as the general {{tl|inflection of}}, semi-specific variants such as {{tl|participle of}}, andspecific variants such as {{tl|past participle of}}. `data` contains all the information controlling the display, withthe following fields:* `.lang`: Overall language of the form-of template. If specified, any lemmas, enclitics or base lemmas that are of a   different language will have that language displayed before the term in question.* `.text`: Text to insert before the lemmas. Wrapped in the value of `.text_classes`, or its default; see below.* `.lemmas`: List of objects describing the lemma(s) of which the term in question is a non-lemma form. These are passed   directly to {full_link()} in [[Module:links]]. Each object should have at minimum a `.lang` field containing the   language of the lemma and a `.term` field containing the lemma itself. Each object is formatted using {full_link()}   and then if there are more than one, they are joined using {serialCommaJoin()} in [[Module:table]]. Alternatively,   `.lemmas` can be a string, which is displayed directly, or omitted, to show no lemma links and omit the connecting   text.* `.lemma_face`: "Face" to use when displaying the lemma objects. Usually should be set to {"term"}.* `.conj`: Conjunction or separator to use when joining multiple lemma objects. If {nil}, defaults to {"and"}. If this   has the value {false}, the lemmas are preceded with the `.separator` field in each lemma.* `.enclitics`: List of enclitics to display after the lemmas, in parens.* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.* `.base_lemmas`: List of base lemmas to display after the lemmas, in the case where the lemmas in `.lemmas` are   themselves forms of another lemma (the base lemma), e.g. a comparative, superlative or participle. Each object is of   the form { { paramobj = PARAM_OBJ, lemmas = {LEMMA_OBJ, LEMMA_OBJ, ...}, conj = "CONJ" }} where PARAM_OBJ describes   the properties of the base lemma parameter (i.e. the relationship between the intermediate and base lemmas);   LEMMA_OBJ is an object suitable to be passed to {full_link()} in [[Module:links]]; and CONJ is the conjunction to   join multiple lemmas with, defaulting to {"and"}. PARAM_OBJ is of the format   { { param = "PARAM", tags = {"TAG", "TAG", ...} } where PARAM is the name of the parameter to {{tl|inflection of}}   etc. that holds the base lemma(s) of the specified relationship and the tags describe the relationship, such as   { {"comd"}} or { {"past", "part"}}.* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is   {"form-of-definition use-with-mention"}. Use `false` for no wrapping.* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.   Use `false` for no wrapping.* `.posttext`: Additional text to display after the lemma links.* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified  in-place to save memory; otherwise they will be copied before modifying.Returns two values, the formatted string and any categories to add the page to (which will arise if `.lang` isspecified and a language other than `.lang` is given in one of the lemmas in `.lemmas` or enclitics in `.enclitics`).]==]functionexport.format_form_of(data)iftype(data)~="table"thenerror("Internal error: First argument must now be a table of arguments")endlocaltext_classes=data.text_classesiftext_classes==nilthentext_classes="form-of-definition use-with-mention"endlocallemma_classes=data.lemma_classesiflemma_classes==nilthenlemma_classes="form-of-definition-link"endlocalparts={}iftext_classestheninsert(parts,"<span class='"..text_classes.."'>")endinsert(parts,data.text)ifdata.text~=""anddata.lemmastheninsert(parts," ")endlocalcategories={}ifdata.lemmastheniftype(data.lemmas)=="string"theninsert(parts,wrap_in_span(data.lemmas,lemma_classes))elselocalformatted_terms={}for_,lemmainipairs(data.lemmas)dolocallinked_term,this_categories=show_linked_term{termobj=lemma,face=data.lemma_face,span_classes=lemma_classes,ok_to_destructively_modify=data.ok_to_destructively_modify,overall_lang=data.lang,text_classes=text_classes}ifthis_categories[1]thenextend(categories,this_categories)endifdata.conj==falseandlemma.separatortheninsert(formatted_terms,lemma.separator)endinsert(formatted_terms,linked_term)endifdata.conj==falsetheninsert(parts,concat(formatted_terms))elseinsert(parts,serial_comma_join(formatted_terms,{conj=data.conjor"dan"}))endendifdata.littheninsert(parts,", secara harfiah "..require(links_module).mark(data.lit,"gloss"))endendifdata.encliticsand#data.enclitics>0then-- The outer parens need to be outside of the text_classes span so they show in upright instead of italic, or-- they will clash with upright parens generated by link annotations such as transliterations and pos=.iftext_classestheninsert(parts,"</span>")endlocalformatted_terms={}for_,encliticinipairs(data.enclitics)do-- FIXME, should we have separate clitic face and/or classes?locallinked_term,this_categories=show_linked_term{termobj=enclitic,face=data.lemma_face,span_classes=lemma_classes,ok_to_destructively_modify=data.ok_to_destructively_modify,overall_lang=data.lang,text_classes=text_classes}ifthis_categories[1]thenextend(categories,this_categories)endinsert(formatted_terms,linked_term)endinsert(parts," (")insert(parts,wrap_in_span("dengan enklitik"..(#data.enclitics>1and""or"").." ",text_classes))insert(parts,serial_comma_join(formatted_terms,{conj=data.enclitic_conjor"dan"}))insert(parts,")")iftext_classestheninsert(parts,"<span class='"..text_classes.."'>")endendifdata.base_lemmasand#data.base_lemmas>0thenfor_,base_lemmainipairs(data.base_lemmas)doinsert(parts,", ")iftext_classestheninsert(parts,"</span>")endinsert(parts,(export.tagged_inflections{lang=data.langorbase_lemma.lemmas[1].lang,tags=base_lemma.paramobj.tags,lemmas=base_lemma.lemmas,conj=base_lemma.conjor"dan",lemma_face=data.lemma_face,no_format_categories=true,nocat=true,text_classes=data.text_classes,ok_to_destructively_modify=ok_to_destructively_modify,}))iftext_classestheninsert(parts,"<span class='"..text_classes.."'>")endendend-- FIXME, should posttext go before enclitics? If so we need to have separate handling for the-- final colon when there are multiple tag sets in tagged_inflections().ifdata.posttexttheninsert(parts,data.posttext)endiftext_classestheninsert(parts,"</span>")endreturnconcat(parts),categoriesendformat_form_of=export.format_form_of--[==[Return true if `tag` contains an internal link or HTML.]==]functionexport.is_link_or_html(tag)returntag:find("[[",nil,true)ortag:find("|",nil,true)ortag:find("<",nil,true)endis_link_or_html=export.is_link_or_html--[==[Look up a tag (either a shortcut of any sort of a canonical long-form tag) and return its expansion. The expansionwill be a string unless the shortcut is a list-tag shortcut such as `1s`; in that case, the expansion will be alist. The caller must handle both cases. Only one level of expansion happens; hence, `acc` expands to {"accusative"},`1s` expands to { {"1", "s"}} (not to { {"first", "singular"}}) and `123` expands to {"1//2//3"}. The expansion will bethe same as the passed-in tag in the following circumstances:# The tag is `;` (this is special-cased, and no lookup is done).# The tag is a multipart tag such as `nom//acc` (this is special-cased, and no lookup is done).# The tag contains a raw link (this is special-cased, and no lookup is done).# The tag contains HTML (this is special-cased, and no lookup is done).# The tag is already a canonical long-form tag.# The tag is unrecognized.This function first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and finally (only if the tag is notrecognized as a shortcut or canonical tag, and is not of types 1-4 above) in [[Module:form of/data/2]].If the expansion is a string and is different from the tag, track it if `do_track` is true.]==]functionexport.lookup_shortcut(tag,lang,do_track)-- If there is HTML or a link in the tag, return it directly; don't try-- to look it up, which will fail.iftag==";"ortag:find("//",nil,true)oris_link_or_html(tag)thenreturntagendlocalexpansionwhilelangdolocallangdata=safe_load_data(form_of_lang_data_module_prefix..lang:getCode())-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an-- optimization; see below.iflangdatatheniflangdata.tags[tag]thenreturntagendexpansion=langdata.shortcuts[tag]ifexpansionthenbreakendend-- If the language has a parent (i.e. a superordinate variety), try again with that.lang=lang:getParent()endifnotexpansionthen-- If this is a canonical long-form tag, just return it, and don't check for shortcuts (which will cause-- [[Module:form of/data/2]] to be loaded, because there won't be a shortcut entry in [[Module:form of/data/1]] ---- or, for that matter, in [[Module:form of/data/2]]). This is an optimization; the code will still work without-- it, but will use up more memory.if(m_data1orget_m_data1()).tags[tag]thenreturntagendexpansion=m_data1.shortcuts[tag]endifnotexpansionthenexpansion=(m_data2orget_m_data2()).shortcuts[tag]endifnotexpansionthenreturntagend-- Maybe track the expansion if it's not the same as the raw tag.ifdo_trackandexpansion~=tagandtype(expansion)=="string"thentrack("tag/"..tag)endreturnexpansionendlookup_shortcut=export.lookup_shortcut--[==[Look up a normalized/canonicalized tag and return the data object associated with it. If the tag isn't found, returnnil. This first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and then finally in[[Module:form of/data/2]].]==]functionexport.lookup_tag(tag,lang)whilelangdolocallangdata=safe_load_data(form_of_lang_data_module_prefix..lang:getCode())localtag=langdataandlangdata.tags[tag]iftagthenreturntagend-- If the language has a parent (i.e. a superordinate variety), try again with that.lang=lang:getParent()endlocaltagobj=(m_data1orget_m_data1()).tags[tag]iftagobjthenreturntagobjendlocaltagobj2=(m_data2orget_m_data2()).tags[tag]iftagobj2thenreturntagobj2endreturnnilendlookup_tag=export.lookup_tag-- Normalize a single tag, which may be a shortcut but should not be a multipart tag, a multipart shortcut or a list-- shortcut.localfunctionnormalize_single_tag(tag,lang,do_track)localexpansion=lookup_shortcut(tag,lang,do_track)iftype(expansion)~="string"thenerror("Tag '"..tag.."' is a list shortcut, which is not allowed here")endtag=expansionifnotlookup_tag(tag,lang)anddo_trackthen-- If after all expansions and normalizations we don't recognize the canonical tag, track it.track("unknown")track("unknown/"..tag)endreturntagend--[=[Normalize a component of a multipart tag. This should not have any // in it, but may join multiple individual tags witha colon, and may be a single list-tag shortcut, which is treated as if colon-separated. The return value may be a listof tags.]=]localfunctionnormalize_multipart_component(tag,lang,do_track)-- If there is HTML or a link in the tag, don't try to split on colon. A colon may legitimately occur in either one,-- and we don't want these things parsed. Note that we don't do this check before splitting on //, which we don't-- expect to occur in links or HTML; see comment in normalize_tag().ifis_link_or_html(tag)thenreturntagendlocalcomponents=split(tag,":",true)if#components==1then-- We allow list-tag shortcuts inside of multipart tags, e.g.-- '1s//3p'. Check for this now.tag=lookup_shortcut(tag,lang,do_track)iftype(tag)=="table"then-- Temporary tracking as we will disallow this.track("list-tag-inside-of-multipart")-- We found a list-tag shortcut; treat as if colon-separated.components=tagelsereturnnormalize_single_tag(tag,lang,do_track)endendlocalnormtags={}-- Temporary tracking as we will disallow this.track("two-level-multipart")for_,componentinipairs(components)doifdo_trackthen-- There are multiple components; track each of the individual-- raw tags.track("tag/"..component)endinsert(normtags,normalize_single_tag(component,lang,do_track))endreturnnormtagsend--[=[Normalize a single tag. The return value may be a list (in the case of multipart tags), which will contain nested listsin the case of two-level multipart tags.]=]localfunctionnormalize_tag(tag,lang,do_track)-- We don't check for links or HTML before splitting on //, which we don't expect to occur in links or HTML. Doing-- it this way allows for a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly (the template calls-- will be expanded before we process the tag, and will contain links and HTML). The only check we do is for a URL,-- which shouldn't normally occur, but might if the user tries to put an external link into the tag. URL's with //-- normally have the sequence ://, which should never normally occur when // and : are used in their normal ways.iftag:find("://",nil,true)thenreturntagendlocalsplit_tags=split(tag,"//",true)if#split_tags==1thenlocalretval=normalize_multipart_component(tag,lang,do_track)iftype(retval)=="table"then-- The user gave a tag like '1:s', i.e. with colon but without //. Allow this, but we need to return a-- nested list.return{retval}endreturnretvalendlocalnormtags={}for_,single_taginipairs(split_tags)doifdo_trackthen-- If the tag was a multipart tag, track each of individual raw tags.track("tag/"..single_tag)endinsert(normtags,normalize_multipart_component(single_tag,lang,do_track))endreturnnormtagsend--[==[Normalize a tag set (a list of tags) into its canonical-form tags. The return value is a list of normalized tag sets(a list because of there may be conjoined shortcuts among the input tags). A normalized tag set is a list of tagelements, where each element is either a string (the canonical form of a tag), a list of such strings (in the case ofmultipart tags) or a list of lists of such strings (in the case of two-level multipart tags). For example, the multiparttag `nom//acc//voc` will be represented in canonical form as { {"nominative", "accusative", "vocative"}}, and thetwo-level multipart tag `1:s//3:p` will be represented as { {{"first-person", "singular"}, {"third-person", "plural"}}}.Example 1:{normalize_tag_set({"nom//acc//voc", "n", "p"})} = { {{{"nominative", "accusative", "vocative"}, "masculine", "plural"}}}Example 2:{normalize_tag_set({"ed-form"}, ENGLISH)} = { {{"simple", "past"}, {"past", "participle"}}}Example 3:{normalize_tag_set({"archaic", "ed-form"}, ENGLISH)} = { {{"archaic", "simple", "past"}, {"archaic", "past", "participle"}}}]==]functionexport.normalize_tag_set(tag_set,lang,do_track)-- We track usage of shortcuts, normalized forms and (in the case of multipart tags or list tags) intermediate-- forms. For example, if the tags 1s|mn|gen|indefinite are passed in, we track the following:-- [[Wiktionary:Tracking/inflection of/tag/1s]]-- [[Wiktionary:Tracking/inflection of/tag/1]]-- [[Wiktionary:Tracking/inflection of/tag/s]]-- [[Wiktionary:Tracking/inflection of/tag/first-person]]-- [[Wiktionary:Tracking/inflection of/tag/singular]]-- [[Wiktionary:Tracking/inflection of/tag/mn]]-- [[Wiktionary:Tracking/inflection of/tag/m//n]]-- [[Wiktionary:Tracking/inflection of/tag/m]]-- [[Wiktionary:Tracking/inflection of/tag/n]]-- [[Wiktionary:Tracking/inflection of/tag/masculine]]-- [[Wiktionary:Tracking/inflection of/tag/neuter]]-- [[Wiktionary:Tracking/inflection of/tag/gen]]-- [[Wiktionary:Tracking/inflection of/tag/genitive]]-- [[Wiktionary:Tracking/inflection of/tag/indefinite]]localoutput_tag_set={}localsaw_semicolon=falsefor_,taginipairs(tag_set)doifdo_trackthen-- Track the raw tag.track("tag/"..tag)end-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list-- of tags).tag=lookup_shortcut(tag,lang,do_track)iftype(tag)=="table"thenifcontains(tag,";")then-- If we saw a conjoined shortcut, we need to use a more general algorithm that can expand a single-- tag set into multiple.saw_semicolon=truebreakendfor_,tinipairs(tag)doifdo_trackthen-- If the tag expands to a list of raw tags, track each of those.track("tag/"..t)endinsert(output_tag_set,normalize_tag(t,lang,do_track))endelseinsert(output_tag_set,normalize_tag(tag,lang,do_track))endendifnotsaw_semicolonthenreturn{output_tag_set}end-- Use a more general algorithm that handles conjoined shortcuts.output_tag_set={}fori,taginipairs(tag_set)doifdo_trackthen-- Track the raw tag.track("tag/"..tag)end-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list-- of tags).tag=lookup_shortcut(tag,lang,do_track)iftype(tag)=="table"thenlocaloutput_tag_sets={}localshortcut_tag_sets=split_tag_set(tag)localnormalized_shortcut_tag_sets={}for_,shortcut_tag_setinipairs(shortcut_tag_sets)doextend(normalized_shortcut_tag_sets,normalize_tag_set(shortcut_tag_set,lang,do_track))endlocalafter_tags=slice(tag_set,i+1)localnormalized_after_tags_sets=normalize_tag_set(after_tags,lang,do_track)for_,normalized_shortcut_tag_setinipairs(normalized_shortcut_tag_sets)dofor_,normalized_after_tags_setinipairs(normalized_after_tags_sets)doinsert(output_tag_sets,append(output_tag_set,normalized_shortcut_tag_set,normalized_after_tags_set))endendreturnoutput_tag_setselseinsert(output_tag_set,normalize_tag(tag,lang,do_track))endenderror("Internal error: Should not get here")endnormalize_tag_set=export.normalize_tag_set--[==[Split a tag set that may consist of multiple semicolon-separated tag sets into the component tag sets.]==]functionexport.split_tag_set(tag_set)localsplit_tag_sets={}localcur_tag_set={}for_,taginipairs(tag_set)doiftag==";"thenif#cur_tag_set>0theninsert(split_tag_sets,cur_tag_set)endcur_tag_set={}elseinsert(cur_tag_set,tag)endendif#cur_tag_set>0theninsert(split_tag_sets,cur_tag_set)endreturnsplit_tag_setsendsplit_tag_set=export.split_tag_setlocaltag_set_param_mods={lb={item_dest="labels",convert=function(arg,parse_err)returnsplit(arg,"//",true)end,}}--[==[Parse tag set properties from a tag set (list of tags). Currently no per-tag properties are recognized, and the onlyper-tag-set property recognized is `<lb:...>` for specifing label(s) for the tag set. Per-tag-set properties must beattached to the last tag.]==]functionexport.parse_tag_set_properties(tag_set)localfunctiongenerate_tag_set_obj(last_tag)tag_set[#tag_set]=last_tagreturn{tags=tag_set}endlocallast_tag=tag_set[#tag_set]-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>, <br/> or-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <lb:...>, so if-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.iflast_tag:find("<",nil,true)andnotlast_tag:find("^[^<]*<%l*[^%l:]")thenreturnparse_inline_modifiers(last_tag,{param_mods=tag_set_param_mods,generate_obj=generate_tag_set_obj,})elsereturngenerate_tag_set_obj(last_tag)endendparse_tag_set_properties=export.parse_tag_set_propertieslocalfunctionnormalize_pos(pos)ifnotposthenreturnnilendreturn(m_pos_dataorget_m_pos_data())[pos]orposend-- Return the display form of a single canonical-form tag. The value-- passed in must be a string (i.e. it cannot be a list describing a-- multipart tag). To handle multipart tags, use get_tag_display_form().localfunctionget_single_tag_display_form(normtag,lang)localdata=lookup_tag(normtag,lang)localdisplay=normtag-- If the tag has a special display form, use itifdataanddata.displaythendisplay=data.displayend-- If there is a nonempty glossary index, then show a link to itlocalglossary=dataanddata[(m_dataorget_m_data()).GLOSSARY]ifglossary~=nilthenifglossary==m_data.WIKTthendisplay="[["..normtag.."|"..display.."]]"elseifglossary==m_data.WPthendisplay="[[w:"..normtag.."|"..display.."]]"elseifglossary==m_data.APPENDIXthendisplay="[[Lampiran:Glosari#"..anchor_encode(normtag).."|"..display.."]]"elseiftype(glossary)~="string"thenerror(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(type(glossary),dump(glossary),normtag))elselocallink=glossary:match("^wikt:(.*)")iflinkthendisplay="[["..link.."|"..display.."]]"endifnotlinkthenlink=glossary:match("^w:(.*)")iflinkthendisplay="[[w:"..link.."|"..display.."]]"endendifnotlinkthendisplay="[[Lampiran:Glosari#"..anchor_encode(glossary).."|"..display.."]]"endendendreturndisplayend--[==[Turn a canonicalized tag spec (which describes a single, possibly multipart tag) into the displayed form. The tag specmay be a string (a canonical-form tag); a list of canonical-form tags (in the case of a simple multipart tag); or alist of mixed canonical-form tags and lists of such tags (in the case of a two-level multipart tag). `joiner` indicateshow to join the parts of a multipart tag, and can be either {"and"} ("foo and bar", or "foo, bar and baz" for 3 ormore), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seemsunlikely to change.)]==]functionexport.get_tag_display_form(tagspec,lang,joiner)iftype(tagspec)=="string"thenreturnget_single_tag_display_form(tagspec,lang)end-- We have a multipart tag. See if there's a display handler to display them specially.for_,handlerinipairs(display_handlersorget_display_handlers())dolocaldisplayval=handler(tagspec,joiner)ifdisplayvalthenreturndisplayvalendend-- No display handler.localdisplayed_tags={}for_,first_level_taginipairs(tagspec)doiftype(first_level_tag)=="string"theninsert(displayed_tags,get_single_tag_display_form(first_level_tag,lang))else-- A first-level element of a two-level multipart tag. Currently we just separate the individual components-- with spaces, but other ways are possible, e.g. using an underscore, colon, parens or braces.localcomponents={}for_,componentinipairs(first_level_tag)doinsert(components,get_single_tag_display_form(component,lang))endinsert(displayed_tags,concat(components," "))endendreturnjoin_multiparts(displayed_tags,joiner)endget_tag_display_form=export.get_tag_display_form--[==[Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags arerepresented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See{get_tag_display_form()} for the meaning of `joiner`.]==]functionexport.get_tag_set_display_form(normalized_tag_set,lang,joiner)localparts={}for_,tagspecinipairs(normalized_tag_set)dolocalto_insert=get_tag_display_form(tagspec,lang,joiner)-- Maybe insert a space before inserting the display form of the tag. We insert a space if-- (a) we're not the first tag; and-- (b) the tag we're about to insert doesn't have the "no_space_on_left" property; and-- (c) the preceding tag doesn't have the "no_space_on_right" property.-- NOTE: We depend here on the fact that-- (1) all tags with either of the above properties set have the same display form as canonical form, and-- (2) all tags with either of the above properties set are single-character tags.-- The second property is an optimization to avoid looking up display forms resulting from multipart tags,-- which won't be found and which will trigger loading of [[Module:form of/data/2]]. If multichar punctuation is-- added in the future, it's ok to change the == 1 below to <= 2 or <= 3.---- If the first property above fails to hold in the future, we need to track the canonical form of each tag-- (including the previous one) as well as the display form. This would also avoid the need for the == 1 check.if#parts>0thenlocalmost_recent_tagobj=parts[#parts]:match("^.[\128-\191]*$")andlookup_tag(parts[#parts],lang)localto_insert_tagobj=to_insert:match("^.[\128-\191]*$")andlookup_tag(to_insert,lang)if((notmost_recent_tagobjornotmost_recent_tagobj.no_space_on_right)and(notto_insert_tagobjornotto_insert_tagobj.no_space_on_left))theninsert(parts," ")endendinsert(parts,to_insert)endreturnconcat(parts)endget_tag_set_display_form=export.get_tag_set_display_form--[==[Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.Single-level multipart tags are left alone. (If we need to, a slight modification of the following codewill also split single-level multipart tags.) This assumes that multipart tags are represented as listsand two-level multipart tags are represented as lists of lists, as is output by {normalize_tag_set()}.NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as`def:s//p` (or the reverse, `s//def:p`).]==]localfunctionsplit_two_level_multipart_tag_set(tag_set)fori,taginipairs(tag_set)doiftype(tag)=="table"then-- We saw a multipart tag. Check if any of the parts are two-level.localsaw_two_level_tag=falsefor_,first_level_taginipairs(tag)doiftype(first_level_tag)=="table"thensaw_two_level_tag=truebreakendendifsaw_two_level_tagthen-- We found a two-level multipart tag.-- (1) Extract the preceding tags.localpre_tags=slice(tag_set,1,i-1)-- (2) Extract the following tags.localpost_tags=slice(tag_set,i+1)-- (3) Loop over each tag set alternant in the two-level multipart tag.-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,-- and recursively split that tag set.localresulting_tag_sets={}for_,first_level_tag_setinipairs(tag)dolocalexpanded_tag_set={}extend(expanded_tag_set,pre_tags)-- The second level may have a string or a list.iftype(first_level_tag_set)=="table"thenextend(expanded_tag_set,first_level_tag_set)elseinsert(expanded_tag_set,first_level_tag_set)endextend(expanded_tag_set,post_tags)extend(resulting_tag_sets,split_two_level_multipart_tag_set(expanded_tag_set))endreturnresulting_tag_setsendendendreturn{tag_set}endlocalfunctiontry_lang_specific_module(langcode,modules_tried,name,data)locallang_specific_module=form_of_lang_data_module_prefix..langcode.."/functions"locallangdata=safe_require(lang_specific_module)iflangdatatheninsert(modules_tried,lang_specific_module)iflangdata.cat_functionsthenlocalfn=langdata.cat_functions[name]iffnthenreturnfn(data),trueendendendreturnnil,falseend-- Call a named function, either from the lang-specific data in-- [[Module:form of/lang-specific/LANGCODE/functions]] or in [[Module:form of/functions]].localfunctioncall_named_function(name,funtype,normalized_tag_set,lang,POS,pagename,lemmas)localdata={pagename=pagenameordefault_pagenameorget_default_pagename(),lemmas=lemmas,tag_set=normalized_tag_set,lang=lang,POS=POS}localmodules_tried={}-- First try lang-specific.whilelangdolocalretval,found_it=try_lang_specific_module(lang:getCode(),modules_tried,name,data)iffound_itthenreturnretvalend-- If the language has a parent (i.e. a superordinate variety), try again with that.lang=lang:getParent()end-- Try lang-independent.insert(modules_tried,form_of_functions_module)localfn=(cat_functionsorget_cat_functions())[name]iffnthenreturnfn(data)endfori,modnameinipairs(modules_tried)domodules_tried[i]="[["..modname.."]]"enderror(("No %s function named '%s' in %s"):format(funtype,name,list_to_text(modules_tried,nil," or ")))end-- Given a tag from the current tag set (which may be a list in case of a multipart tag),-- and a tag from a categorization spec, check that the two match.-- (1) If both are strings, we just check for equality.-- (2) If the spec tag is a string and the tag set tag is a list (i.e. it originates from a-- multipart tag), we check that the spec tag is in the list. This is because we want to treat-- multipart tags in user-specified tag sets as if the user had specified multiple tag sets.-- For example, if the user said "1//3|s|pres|ind" and the categorization spec says {"has", "1"},-- we want this to match, because "1//3|s|pres|ind" should be treated equivalently to two tag-- sets "1|s|pres|ind" and "3|s|pres|ind", and the former matches the categorization spec.-- (3) If the spec tag is a list (i.e. it originates from a multipart tag), we check that the-- tag set tag is also a list and is a superset of the spec tag. For example, if the categorization-- spec says {"has", "1//3"}, then the tag set tag must be a multipart tag that has both "1" and "3"-- in it. "1//3" works, as does "1//2//3".localfunctiontag_set_tag_matches_spec_tag(tag_set_tag,spec_tag)iftype(spec_tag)=="table"theniftype(tag_set_tag)=="table"andis_subset_list(spec_tag,tag_set_tag)thenreturntrueendelseiftype(tag_set_tag)=="table"thenifcontains(tag_set_tag,spec_tag)thenreturntrueendelseiftag_set_tag==spec_tagthenreturntrueendreturnfalseend-- Check that the current tag set matches the given spec tag. This means that any of the tags-- in the current tag set match, according to tag_set_tag_matches_spec_tag(); see above. If the-- current tag set contains only string tags (i.e. no multipart tags), and the spec tag is a-- string (i.e. not a multipart tag), this boils down to list containment, but it gets more-- complex when multipart tags are present.localfunctiontag_set_matches_spec_tag(spec_tag,tag_set,lang)spec_tag=normalize_tag(spec_tag,lang)for_,tag_set_taginipairs(tag_set)doiftag_set_tag_matches_spec_tag(tag_set_tag,spec_tag)thenreturntrueendendreturnfalseend-- Check whether the given spec matches the current tag set. Two values are returned:-- (1) whether the spec matches the tag set; (2) the index of the category to add if-- the spec matches.localfunctioncheck_condition(spec,tag_set,normalized_tag_set,lang,POS,pagename,lemmas)iftype(spec)=="boolean"thenreturnspecelseiftype(spec)~="table"thenerror("Wrong type of condition "..spec..": "..type(spec))endlocalpredicate=spec[1]ifpredicate=="has"thenreturntag_set_matches_spec_tag(spec[2],tag_set,lang),3elseifpredicate=="hasall"thenfor_,taginipairs(spec[2])doifnottag_set_matches_spec_tag(tag,tag_set,lang)thenreturnfalse,3endendreturntrue,3elseifpredicate=="hasany"thenfor_,taginipairs(spec[2])doiftag_set_matches_spec_tag(tag,tag_set,lang)thenreturntrue,3endendreturnfalse,3elseifpredicate=="tags="thenlocalnormalized_spec_tag_sets=normalize_tag_set(spec[2],lang)if#normalized_spec_tag_sets>1thenerror("Internal error: No support for conjoined shortcuts in category/label specs in ".."[[Module:form of/cats]] when processing spec tag set "..concat(spec[2],"|"))endlocalnormalized_spec_tag_set=normalized_spec_tag_sets[1]-- Check for and disallow two-level multipart tags in the specs. FIXME: Remove this when we remove-- support for two-level multipart tags.for_,taginipairs(normalized_spec_tag_set)doiftype(tag)=="table"thenfor_,subtaginipairs(tag)doiftype(subtag)=="table"thenerror("Internal error: No support for two-level multipart tags in category/label specs".."[[Module:form of/cats]] when processing spec tag set "..concat(spec[2],"|"))endendendend-- Allow tags to be in different orders, and multipart tags to be in different orders. To handle this,-- we first check that both tag set tags and spec tags have the same length. If so, we sort the-- multipart tags in the tag set tags and spec tags, and then check that all tags in the spec tags are-- in the tag set tags.if#tag_set~=#normalized_spec_tag_setthenreturnfalse,3endlocaltag_set_tags=deep_copy(tag_set)fori=1,#tag_set_tagsdoiftype(tag_set_tags[i])=="table"thensort(tag_set_tags[i])endiftype(normalized_spec_tag_set[i])=="table"thensort(normalized_spec_tag_set[i])endendfori=1,#tag_set_tagsdoifnotcontains(tag_set_tags,normalized_spec_tag_set[i])thenreturnfalse,3endendreturntrue,3elseifpredicate=="p="thenreturnPOS==normalize_pos(spec[2]),3elseifpredicate=="pany"thenfor_,specposinipairs(spec[2])doifPOS==normalize_pos(specpos)thenreturntrue,3endendreturnfalse,3elseifpredicate=="pexists"thenreturnPOS~=nil,2elseifpredicate=="not"thenlocalcondval=check_condition(spec[2],tag_set,normalized_tag_set,lang,POS,pagename,lemmas)returnnotcondval,3elseifpredicate=="and"thenlocalcondval=check_condition(spec[2],tag_set,normalized_tag_set,lang,POS,pagename,lemmas)ifcondvalthencondval=check_condition(spec[3],tag_set,normalized_tag_set,lang,POS,pagename,lemmas)endreturncondval,4elseifpredicate=="or"thenlocalcondval=check_condition(spec[2],tag_set,normalized_tag_set,lang,POS,pagename,lemmas)ifnotcondvalthencondval=check_condition(spec[3],tag_set,normalized_tag_set,lang,POS,pagename,lemmas)endreturncondval,4elseifpredicate=="call"thenreturncall_named_function(spec[2],"condition",normalized_tag_set,lang,POS,pagename,lemmas),3elseerror("Unrecognized predicate: "..predicate)endend-- Process a given spec. This checks any conditions in the spec against the-- tag set, and insert any resulting categories into `categories`. Return value-- is true if the outermost condition evaluated to true and a category was inserted-- (this is used in {"cond" ...} conditions, which stop when a subcondition evaluates-- to true).localfunctionprocess_spec(spec,tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)ifnotspecthenreturnfalseelseiftype(spec)=="string"then-- A category. Substitute POS request with user-specified part of speech or default.spec=spec:gsub("<<p=(.-)>>",function(default)returnPOSornormalize_pos(default)end)insert(categories,spec.." bahasa "..lang:getFullName())returntrueelseiftype(spec)=="table"andspec.labelsthen-- A label spec.for_,labelinipairs(spec.labels)doinsert_if_not(labels,label)endreturntrueelseiftype(spec)~="table"thenerror("Wrong type of specification "..spec..": "..type(spec))endlocalpredicate=spec[1]ifpredicate=="multi"thenfor_,spiniterate_from(2,ipairs(spec))do-- Iterate from 2.process_spec(sp,tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)endreturntrueelseifpredicate=="cond"thenfor_,spiniterate_from(2,ipairs(spec))do-- Iterate from 2.ifprocess_spec(sp,tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)thenreturntrueendendreturnfalseelseifpredicate=="call"thenreturnprocess_spec(call_named_function(spec[2],"spec",normalized_tag_set,lang,POS,pagename,lemmas),tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)elselocalcondval,ifspec=check_condition(spec,tag_set,normalized_tag_set,lang,POS,pagename,lemmas)ifcondvalthenprocess_spec(spec[ifspec],tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)returntrueelseprocess_spec(spec[ifspec+1],tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)-- FIXME: Are we sure this is correct?returnfalseendendend--[==[Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags arerepresented as lists, and two-level multipart tags as lists of lists), fetch the associated categories and labels.Return two values, a list of categories and a list of labels. `lang` is the language of term represented by the tag set,and `POS` is the user-provided part of speech (which may be {nil}).]==]functionexport.fetch_categories_and_labels(normalized_tag_set,lang,POS,pagename,lemmas)localcategories,labels={},{}POS=normalize_pos(POS)-- First split any two-level multipart tags into multiple sets, to make our life easier.for_,tag_setinipairs(split_two_level_multipart_tag_set(normalized_tag_set))dolocallangcode=lang:getCode()locallangspecs=(m_cats_dataorget_m_cats_data())[langcode]iflangspecsthenfor_,specinipairs(langspecs)doprocess_spec(spec,tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)endendlocalfull_code=lang:getFullCode()iffull_code~=langcodethenlocallangspecs=(m_cats_dataorget_m_cats_data())[full_code]iflangspecsthenfor_,specinipairs(langspecs)doprocess_spec(spec,tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)endendendiffull_code~="und"thenlocallangspecs=(m_cats_dataorget_m_cats_data())["und"]iflangspecsthenfor_,specinipairs(langspecs)doprocess_spec(spec,tag_set,normalized_tag_set,lang,POS,pagename,lemmas,categories,labels)endendendendreturncategories,labelsendfetch_categories_and_labels=export.fetch_categories_and_labelslocalfunctionformat_labels(labels,data,notext)iflabelsand#labels>0thenreturnshow_labels{labels=labels,lang=data.lang,sort=data.sort,nocat=data.nocat}..(notextand(data.pretextor"")==""and""or" ")elsereturn""endend--[==[Implementation of templates that display inflection tags, such as the general {{tl|inflection of}}, semi-specificvariants such as {{tl|participle of}}, and specific variants such as {{tl|past participle of}}. `data` contains all theinformation controlling the display, with the following fields:* `.lang`: ('''''required''''') Language to use when looking up language-specific inflection tags, categories and  labels, and for displaying categories and labels.* `.tags`: ('''''required''' unless `.tag_sets` is given'') List of non-canonicalized inflection tags. Multiple tag sets  can be indicated by a {";"} as one of the tags, and tag-set properties may be attached to the last tag of a tag set.  The tags themselves may come directly from the user (as in {{tl|inflection of}}); come partly from the user (as in  {{tl|participle of}}, which adds the tag `part` to user-specified inflection tags); or be entirely specified by the  template (as in {{tl|past participle of}}).* `.tag_sets`: ('''''required''' unless `.tags` is given'') List of non-canonicalized tag sets and associated  per-tag-set properties. Each element of the list is an object of the form  { {tags = {"TAG", "TAG", ...}, labels = {"LABEL", "LABEL", ...}}. If `.tag_sets` is specified, `.tags` should not be  given and vice-versa. Specifying `.tag_sets` in place of tags allowed per-tag set labels to be specified; otherwise,  there is no advantage. [[Module:pt-gl-inflections]] uses this functionality to supply labels like {"Brazil"} and  {"Portugal"} associated with specific tag sets.* `.lemmas`: ('''''recommended''''') List of objects describing the lemma(s) of which the term in question is a  non-lemma form. These are passed directly to {full_link()} in [[Module:links]]. Each object should have at minimum a  `.lang` field containing the language of the lemma and a `.term` field containing the lemma itself. Each object is  formatted using {full_link()} and then if there are more than one, they are joined using {serialCommaJoin()} in  [[Module:table]]. Alternatively, `.lemmas` can be a string, which is displayed directly. If omitted entirely, no lemma  links are shown and the connecting "of" is also omitted.* `.lemma_face`: ('''''recommended''''') "Face" to use when displaying the lemma objects. Usually should be set to  {"term"}.* `.POS`: ('''''recommended''''') Categorizing part-of-speech tag. Comes from the {{para|p}} or {{para|POS}} argument of  {{tl|inflection of}}.* `.pagename`: Page name of "current" page or nil to use the actual page title; for testing purposes.* `.conj`: Conjunction or separator to use when joining multiple lemma objects. Defaults to {"and"}.* `.enclitics`: List of enclitics to display after the lemmas, in parens.* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.* `.no_format_categories`: If true, don't format the categories derived from the inflection tags; just return them.* `.sort`: Sort key for formatted categories. Ignored when `.no_format_categories` = {true}.* `.nocat`: Suppress computation of categories (even if `.no_format_categories` is not given).* `.notext`: Disable display of all tag text and `inflection of` text. (FIXME: Maybe not implemented correctly.)* `.capfirst`: Capitalize the first word displayed.* `.pretext`: Additional text to display before the inflection tags, but after any top-level labels.* `.posttext`: Additional text to display after the lemma links.* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is   {"form-of-definition use-with-mention"}.* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.* `.joiner`: Override the joiner (normally a slash) used to join multipart tags. You should normally not specify this.* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified  in-place to save memory; otherwise they will be copied before modifying.A typical call might look like this (for {{m+|es|amo}}): {local lang = require("Module:languages").getByCode("es")local lemma_obj = {lang = lang,term = "amar",}return m_form_of.tagged_inflections({lang = lang, tags = {"1", "s", "pres", "ind"}, lemmas = {lemma_obj}, lemma_face = "term", POS = "verb"})}Normally, one value is returned, the formatted text, which has appended to it the formatted categories derived from thetag-set-related categories generated by the specs in [Module:form of/cats]]. To suppress this, set`data.no_format_categories` = {true}, in which case two values are returned, the formatted text without any formattedcategories appended and a list of the categories to be formatted.NOTE: There are two sets of categories that may be generated: (1) categories derived directly from the tag sets, asspecified in [[Module:form of/cats]]; (2) categories derived from tag-set labels, either (a) set explicitly by thecaller in `data.tag_sets`, (b) specified by the user using `<lb:...>` attached to the last tag in a tag set, or(c) specified in [[Module:form of/cats]]. The second type (label-related categories) are currently not returned inthe second return value of {tagged_inflections()}, and are currently inserted into the output text even if`data.no_format_categories` is set to {true}; but they can be suppressed by setting `data.nocat` = {true} (which alsosuppresses the first type of categories, those derived directly from tag sets, even if `data.no_format_categories` isset to {true}).]==]functionexport.tagged_inflections(data)ifnotdata.tagsandnotdata.tag_setsthenerror("First argument must be a table of arguments, and `.tags` or `.tag_sets` must be specified")endifdata.tagsanddata.tag_setsthenerror("Both `.tags` and `.tag_sets` cannot be specified")endlocaltag_sets=data.tag_setsifnottag_setsthentag_sets=split_tag_set(data.tags)fori,tag_setinipairs(tag_sets)dotag_sets[i]=parse_tag_set_properties(tag_set)endendlocalinflections={}localcategories={}for_,tag_setinipairs(tag_sets)dolocalnormalized_tag_sets=normalize_tag_set(tag_set.tags,data.lang,"do-track")for_,normalized_tag_setinipairs(normalized_tag_sets)dolocalthis_categories,this_labels=fetch_categories_and_labels(normalized_tag_set,data.lang,data.POS,data.pagename,type(data.lemmas)=="table"anddata.lemmasornil)ifnotdata.nocatthenextend(categories,this_categories)endlocalcur_infl=get_tag_set_display_form(normalized_tag_set,data.lang,data.joiner)if#cur_infl>0theniftag_set.labelsthenthis_labels=append(tag_set.labels,this_labels)endinsert(inflections,{infl_text=cur_infl,labels=this_labels})endendendlocaloverall_labels,need_per_tag_set_labelsfor_,inflectioninipairs(inflections)doifoverall_labels==nilthenoverall_labels=inflection.labelselseifnotdeep_equals(overall_labels,inflection.labels)thenneed_per_tag_set_labels=trueoverall_labels=nilbreakendendifnotneed_per_tag_set_labelsthenfor_,inflectioninipairs(inflections)doinflection.labels=nilendendlocalformat_data=shallow_copy(data)localof_text=data.lemmasand" of"or""localformatted_text,this_categoriesif#inflections==1thenifneed_per_tag_set_labelsthenerror("Internal error: need_per_tag_set_labels should not be set with one inflection")endformat_data.text=format_labels(overall_labels,data,data.notext)..(data.pretextor"")..(data.notextand""or((data.capfirstanducfirst(inflections[1].infl_text)orinflections[1].infl_text)..of_text))formatted_text,this_categories=format_form_of(format_data)elseformat_data.text=format_labels(overall_labels,data,data.notext)..(data.pretextor"")..(data.notextand""or((data.capfirstand"Inflection"or"inflection")..of_text))format_data.posttext=(data.posttextor"")..":"locallinklink,this_categories=format_form_of(format_data)localtext_classes=data.text_classesiftext_classes==nilthentext_classes="form-of-definition use-with-mention"endfori,inflectioninipairs(inflections)doinflections[i]="\n## "..format_labels(inflection.labels,data,false)..wrap_in_span(inflection.infl_text,text_classes)endformatted_text=link..concat(inflections)endifthis_categories[1]thenextend(categories,this_categories)endifnotdata.no_format_categoriesthenifcategories[1]thenformatted_text=formatted_text..format_categories(categories,data.lang,data.sort,nil,export.force_cat)endreturnformatted_textendreturnformatted_text,categoriesendfunctionexport.dump_form_of_data(frame)localdata={require(form_of_data1_module),require(form_of_data2_module)}returnrequire(json_module).toJSON(data)endexport.form_of_cats_module=form_of_cats_moduleexport.form_of_data1_module=form_of_data1_moduleexport.form_of_data2_module=form_of_data2_moduleexport.form_of_functions_module=form_of_functions_moduleexport.form_of_lang_data_module_prefix=form_of_lang_data_module_prefixexport.headword_data_module=headword_data_module-- so all form-of modules stay in syncreturnexport
Diambil daripada "https://ms.wiktionary.org/w/index.php?title=Modul:form_of&oldid=239342"
Kategori:
ページ先頭