Movatterモバイル変換


[0]ホーム

URL:


跳转到内容
维基词典自由的多语言词典
搜索

模組:Labels/data/lang/zh/functions

維基詞典,自由的多語言詞典
<Module:Labels |data |lang |zh

此模块缺少說明文檔。請協助創建
相關連結:根頁面根頁面的子頁面本頁面的子頁面链入嵌入包含測試用例沙盒

localexport={}export.postprocess_handlers={}locallabels_module="Module:labels"localunpack=unpackortable.unpack-- Lua 5.2 compatibility-- Remove duplicated labels like 'Taiwanese' in 'Taiwanese Hokkien|and|Taiwanese Hakka'. Also remove duplicated labels-- in things like-- * 'Quanzhou|_|Hokkien' (which canonicalizes to 'Quanzhou Hokkien|_|Hokkien');-- * 'Xiamen|and|Quanzhou|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|and|Quanzhou Hokkien|_|Hokkien');-- * 'Xiamen|and|Anxi|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|and|Anxi|_|Hokkien');-- * 'Xiamen|Zhangzhou|and|Quanzhou|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|Zhangzhou Hokkien|and|Quanzhou Hokkien|_|Hokkien');-- * 'Xiamen|Zhangzhou|and|Anxi|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|Zhangzhou Hokkien|and|Anxi|_|Hokkien').-- We do two passes. The first pass fixes cases like 'Quanzhou Hokkien|_|Hokkien', irrespective of whether there's an-- "and" present. The second pass looks for a stretch of labels where (a) all of the labels have the same prefix or-- suffix, and (b) in between the labels is at least one occurrence of "and" (which can also start out as "&" but is-- canonicalized to "and"); but (c) we count two labels separated by "_" (which is canonicalized to a blank label) as a-- single label.table.insert(export.postprocess_handlers,function(data)locallabels=data.labelsif#labels==1thenreturnendlocalm_labels=require(labels_module)-- First, split the labels into `link` and `display` component parts (done only once).localsplit_labels={}fori,labelinipairs(labels)dolocallink,display=m_labels.split_display_form(label.label)split_labels[i]={link=link,display=display}end-- Then compute "label starts" (indices of label sets to consider when looking for runs with the same prefix or-- suffix), where a label start is either a single label or a set of two labels separated by an underscore,-- and where we take occurrences of "and" into consideration.locallabel_starts={}locali=1whilei<=#labelsdolocalstart=ilocalfollowed_by_and=falselocalafter_underscoreifi<=#labels-4andlabels[i+1].label==""andlabels[i+2]=="and"andlabels[i+3]==""then-- 'Foo|_|and|_|Bar'; redundant underscoresfollowed_by_and=truei=i+3elseifi<=#labels-2andlabels[i+1].label=="and"thenfollowed_by_and=truei=i+1elseifi<=#labels-2andlabels[i+1].label==""thenafter_underscore=i+2i=i+1endtable.insert(label_starts,{start=start,followed_by_and=followed_by_and,after_underscore=after_underscore})i=i+1end-- Now the main loop.-- Each spec is {"affix", `at_beginning`}, or {{"affix", "affix"}, `at_beginning`} where "affix" is a prefix or-- suffix to remove and `at_beginning` indicates whether "affix" is a prefix or suffix. If more than one affix-- is listed, any affix counts, e.g. 'Taiwan Mandarin|and|Taiwanese Hokkien'.for_,affix_specinipairs{{{"Taiwanese","Taiwan"},true},{"Chinese"},{"Gan"},{"Hakka"},{"Hokkien"},{"Mandarin"},-- Min needs to go before Southern Min, Eastern Min, etc. because the later check for e.g. Eastern Min-- will overwrite the value set by Min if both match. With Min later, we'll end up with e.g.-- "Fuqing Eastern Eastern Min".{"Min"},{"Southern Min"},{"Eastern Min"},{"Northern Min"},{"Central Min"},{"Wu"},{"Xiang"}}dolocalaffixes,at_beginning=unpack(affix_spec)iftype(affixes)=="string"thenaffixes={affixes}end-- Does `item` match against the prefix or suffix when both prefix/suffix and something else are-- present? If so, return the something else, which is what we need to set the label to if we remove-- the prefix/suffix.localfunctionmatches_affix_with_space(item)for_,affixinipairs(affixes)dolocalspace_regex=at_beginningand"^"..affix.." (.+)$"or"^(.+) "..affix.."$"localrest=item:match(space_regex)ifrestthenreturnrestendendreturnfalseend-- Does `item` match against the prefix or suffix exactly? If so, return an empty string, which is what-- we need to set the label to if we remove the prefix/suffix.localfunctionmatches_affix_exactly(item)for_,affixinipairs(affixes)doifitem==affixthenreturn""endendreturnfalseend-- Does the link or display at `label_index` match with `match_function`? If so, return a three-element-- list of `label_index`, `component` (either "link" or "display") and the return value of `match_function`.localfunctioncheck_match(label_index,match_function)locallink,display=split_labels[label_index].link,split_labels[label_index].displaylocalrest=displayandmatch_function(display)ifrestthenreturn{label_index,"display",rest}elserest=linkandmatches_affix_with_space(link)ifrestthenreturn{label_index,"link",rest}endendreturnnilend-- Given {`label_index`, `component`, `value`}, set the link or display component (depending on `component`)-- of the label at `label_index` to `value`.localfunctionset_component_value(to_erase)locallabel_index,component,value=unpack(to_erase)ifvalue==""thenlabels[label_index].label=""elselocallink,display=split_labels[label_index].link,split_labels[label_index].displayifcomponent=="display"thendisplay=valueelselink=valueendlabels[label_index].label=m_labels.combine_display_form_parts(link,display)endend-- First pass: Look for two labels separated by an underscore, with the suffix occurring on both parts.-- (This shouldn't happen with prefixes.)ifnotat_beginningthenfor_,label_startinipairs(label_starts)dolocalto_erase=check_match(label_start.start,matches_affix_with_space)ifto_eraseandlabel_start.after_underscoreandcheck_match(label_start.after_underscore,matches_affix_exactly)thenset_component_value(to_erase)endendend-- Second pass.-- Check whether a prefix or suffix matches the given label start index (index of a label set in the-- `label_starts` list; see above). If it matches, return value is {`index`, `component`, `value`}, i.e.-- the label index to change, the component ("link" or "display") to change and the value to set the-- component to. Otherwise, return nil.localfunctionaffix_matches(label_start_index)locallabel_start=label_starts[label_start_index]-- If we're dealing with a suffix, there are two cases: (1) 'Quanzhou Hokkien';-- (2) 'Quanzhou|_|Hokkien'. If we're dealing with a prefix, there are similarly (1) 'Taiwanese Hakka';-- (2) 'Taiwanese|_|Hakka'. In addition, we have to check both the link and the display.localto_erase=check_match(label_start.start,matches_affix_with_space)ifto_erasethenreturnto_eraseendlocalafter_underscore=label_start.after_underscoreifnotafter_underscorethenreturnnilendreturncheck_match(at_beginningandlabel_start.startorafter_underscore,matches_affix_exactly)end-- Now, try to find a run of two or more label sets with the same prefix or suffix, with at least one "and"-- in the middle.localj=1whilej<=#label_starts-1dolocalsaw_and=falselocalrun={}localmatch=affix_matches(j)ifmatchthentable.insert(run,match)localk=j+1whilek<=#label_startsdomatch=affix_matches(k)ifnotmatchthenbreakendtable.insert(run,match)iflabel_starts[k-1].followed_by_andthensaw_and=trueendk=k+1endif#run>1andsaw_andthen-- We saw a run of two or more with at least one 'and' in the middle. Remove the prefix or-- suffix from all but the last (if we're dealing with a suffix) or all but the first (if we're-- dealing with a prefix).ifat_beginningthentable.remove(run,1)elsetable.remove(run)endfor_,to_eraseinipairs(run)doset_component_value(to_erase)endendj=k+1elsej=j+1endendendend)returnexport
来自“https://zh.wiktionary.org/w/index.php?title=Module:Labels/data/lang/zh/functions&oldid=9420678
分类:​

[8]ページ先頭

©2009-2025 Movatter.jp