This module will transliterate text in one of theNortheast Caucasian languages. It is also used to transliterateAghul,Akhvakh,Andi,Archi,Avar,Budukh,Botlikh,Chechen,Chamalal,Ingush,Bezhta, andBagvalal.The module should preferably not be called directly from templates or other modules.To use it from a template, use{{xlit}}.Within a module, useModule:languages#Language:transliterate.
For testcases, seeModule:cau-nec-translit/testcases.
tr(text, lang, sc)text written in the script specified by the codesc, and language specified by the codelang.nil.localm_str_utils=require("Module:string utilities")localgsub=m_str_utils.gsublocallower=m_str_utils.lowerlocaltoNFC=mw.ustring.toNFClocaltoNFD=mw.ustring.toNFDlocalu=m_str_utils.charlocalupper=m_str_utils.upperlocalCyrlConsonant="бвгджзклмнпрстфхцчшщБВГДЖЗКЛМНПРСТФХЦЧШЩ"localCyrlVowel="аеёиоуыэюяАЕЁИОУЫЭЮЯ"localACUTE,CIRC,TILDE,MACRON,BREVE,DOTABOVE,DIAER,CARON,DOTBELOW=u(0x301),u(0x302),u(0x303),u(0x304),u(0x306),u(0x307),u(0x308),u(0x30C),u(0x323)localaccent="["..ACUTE..CIRC..TILDE..MACRON..BREVE..DOTABOVE..DIAER..CARON..DOTBELOW.."]"localbr=u(0xF000)localexport={}-- Structured like this to reduce size of loaded table.localfunctiongetSubs(lang)--Aghuliflang=="agx"thenreturn{{["гъ"]="ğ",["гь"]="h",["гӏ"]="ʻʳ",["къ"]="qq",["кь"]="qʼ",["кӏ"]="kʼ",["пӏ"]="pʼ",["тӏ"]="tʼ",["хъ"]="q",["хь"]="x̂",["хӏ"]="ḥʳ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Akhvakhelseiflang=="akv"thenreturn{{["гъӏ"]="ğʰ",["къӏ"]="qˣʼ",["кьӏ"]="kˡʼ",["лӏъ"]="ᵏl",["хъӏ"]="qˣ"},{["гъ"]="ɣ",["гь"]="h",["гӏ"]="ʻʳ",["къ"]="qxʼ",["кь"]="kkˡʼ",["кӏ"]="kʼ",["лъ"]="lˢ",["ль"]="ĺ",["лӏ"]="ᵏll",["пӏ"]="pʼ",["тӏ"]="tʼ",["хъ"]="qx",["хь"]="x̂",["хӏ"]="ḥʳ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Andielseiflang=="ani"thenreturn{{["къкъ"]="qxʼ",["хъхъ"]="qx"},{["гъӏ"]="ğʼ",["жъӏ"]="žʼ",["къӏ"]="qxʼ",["къг"]="ɢ",["лъӏ"]="llˢʼ",["хъӏ"]="qx",["цъӏ"]="ccʼ",["чъӏ"]="cčʼ"},{["гъ"]="ğ",["гь"]="h",["гӏ"]="gʼ",["къ"]="qˣʼ",["кь"]="kkˡʼ",["кӏ"]="kʼ",["лъ"]="lˢ",["ль"]="lˢʼ",["лӏ"]="ᵏll",["пӏ"]="pʼ",["тӏ"]="tʼ",["хъ"]="qˣ",["хь"]="x̂",["хӏ"]="xʼ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ˀ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Archielseiflang=="aqc"thenreturn{{["ккъӏ"]="qq̣ʼ"},{["гъӏ"]="ğ̣",["ккъ"]="qqʼ",["къӏ"]="q̣ʼ",["хъӏ"]="q̣",["хьӏ"]="x̣"},{["гъ"]="ğ",["гь"]="h",["гӏ"]="ˀ",["къ"]="qʼ",["кь"]="kˡʼ",["кӏ"]="kʼ",["лъ"]="lʰ",["ль"]="lˠ",["лӏ"]="ᵏl",["пӏ"]="pʼ",["тӏ"]="tʼ",["хъ"]="q",["хӏ"]="ḥʳ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="w",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ə",["ь"]="",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Avarelseiflang=="av"thenreturn{{["гъ"]="ğ",["гь"]="h",["гӏ"]="ʻ",["къ"]="qxʼ",["кь"]="kkˡʼ",["кӏ"]="kʼ",["лъ"]="lˢ",["лӏ"]="ᵏll",["тӏ"]="tʼ",["хъ"]="qx",["хь"]="x̂",["хӏ"]="ḥʳ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="w",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Bagvalalelseiflang=="kva"thenreturn{{["гъ"]="ğ",["гь"]="h",["гӏ"]="ˀ",["къ"]="qʼ",["кь"]="kkˡʼ",["кӏ"]="kʼ",["лъ"]="lˢ",["лӏ"]="ᵏll",["сӏ"]="sʼ",["тӏ"]="tʼ",["хъ"]="qx",["хь"]="x̂",["хӏ"]="ḥ",["цӏ"]="cʼ",["чӏ"]="čʼ",["шӏ"]="šʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Bezhtaelseiflang=="kap"thenreturn{{["гъ"]="ğ",["гь"]="h",["гӏ"]="ʻ",["къ"]="qxʼ",["кь"]="kˡʼ",["кӏ"]="kʼ",["лъ"]="lˢ",["лӏ"]="ᵏll",["пӏ"]="pʼ",["тӏ"]="tʼ",["хъ"]="qx",["хь"]="x̂",["хӏ"]="ḥ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Botlikhelseiflang=="bph"thenreturn{{["гъ"]="ğ",["гь"]="h",["къ"]="qˣʼ",["кь"]="kkˡʼ",["кӏ"]="kʼ",["лъ"]="lˢ",["лӏ"]="ᵏll",["пӏ"]="pʼ",["тӏ"]="tʼ",["хъ"]="qˣ",["хь"]="x̂",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="w",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ˀ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Budukhelseiflang=="bdk"thenreturn{{["къг"]="gʰ"},{["гъ"]="ğ",["гь"]="h",["гӏ"]="ʻ",["къ"]="qq",["кь"]="qʼ",["кӏ"]="kʼ",["пӏ"]="pʼ",["тӏ"]="tʼ",["хъ"]="q",["хь"]="x̂",["хӏ"]="ḥ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ı",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja",["ӏ"]="ˀ"}}-- Chamalalelseiflang=="cji"thenreturn{{["кӏкӏ"]="kxʰʼ"},{["гъ"]="ğ",["гь"]="h",["гӏ"]="ʻ",["къ"]="qxʼ",["кь"]="kkˡʼ",["кӏ"]="kʼ",["лъ"]="lˢ",["лӏ"]="ᵏll",["пӏ"]="pʼ",["сӏ"]="sʼ",["тӏ"]="tʼ",["хъ"]="qx",["хь"]="x̂",["хӏ"]="ḥ",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šš",["ъ"]="ʔ",["ы"]="ə",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja"}}-- Chechen and Ingushelseiflang=="ce"orlang=="inh"thenreturn{{["ккх"]="qq",["рхӏ"]="rh"},{["гӏ"]="ğ",["кх"]="q",["къ"]="qʼ",["кӏ"]="kʼ",["пӏ"]="pʼ",["тӏ"]="tʼ",["хь"]="ḥʳ",["хӏ"]="h",["цӏ"]="cʼ",["чӏ"]="čʼ"},{["а"]="a",["б"]="b",["в"]="v",["г"]="g",["д"]="d",["е"]="e",["ё"]="jo",["ж"]="ž",["з"]="z",["и"]="ı̇",["й"]="j",["к"]="k",["л"]="l",["м"]="m",["н"]="n",["о"]="o",["п"]="p",["р"]="r",["с"]="s",["т"]="t",["у"]="u",["ф"]="f",["х"]="x",["ц"]="c",["ч"]="č",["ш"]="š",["щ"]="šč",["ъ"]="ʔ",["ы"]="y",["ь"]="ʲ",["э"]="e",["ю"]="ju",["я"]="ja",["ӏ"]="ˀ"}}endendlocalfunctiondouble_with_j(vowel,acute,nasal)localret=vowel..(nasal~=""andTILDEornasal)returnret..acute..lower(ret)endfunctionexport.tr(text,lang,sc)localsubs=getSubs(lang)ifnotsubsthenreturnnilend-- Convert uppercase palochka to lowercase, along with any "false" palochkas (entered as Latin "I" or "l", Greek "Ι" or Cyrillic "І"). Lowercase palochka is found in tables above.text=gsub(text,"[IlΙІӀ]","ӏ")-- Convert dialectal nasal ᵸ written as Latin ᴴ.text=gsub(text,"ᴴ","ᵸ")-- Decompose precomposed characters, except for ё and й.text=gsub(text,"[^ёЁйЙ]",toNFD)-- Substitute double consonants for macrons over consonants. Add a temporary breaking character after, to prevent the creation of false multigraphs with following characters.localfunctionmacronToDouble(a,b)returna..b..lower(a)..b..brendtext=gsub(text,"(["..CyrlConsonant.."])"..MACRON.."([ъь])"..MACRON,macronToDouble)text=gsub(text,"(["..CyrlConsonant.."ъьЪЬ])"..MACRON.."(ӏ)"..MACRON,macronToDouble)text=gsub(text,"(["..CyrlConsonant.."])"..MACRON,macronToDouble)-- Remove any double hard/soft signs or palochkas this creates.text=gsub(text,"([ъьӏЪЬӀ])".."([ъьӏ])",function(a,b)ifb==lower(a)thenreturnaelsereturna..bendend)-- Contextual substitution of "j" before "е", "w" for "у" and ʷ for "в".iflang=="aqc"thentext=gsub(text,"(["..CyrlConsonant.."ъьЪЬ]"..br.."?[ӏӀ]?"..br.."?)в","%1ʷ")elsetext=gsub(gsub(text,"^е","jе"),"^Е","Jе")text=gsub(text,"(["..CyrlVowel.."%s%p]"..MACRON.."?ь?ӏ?ᵸ?)е","%1jе")text=gsub(text,"([%s%p])Е","%1Jе")text=gsub(text,"у([аиоуыэ])","w%1")text=gsub(text,"У([аиоуыэ])","W%1")text=gsub(text,"(["..CyrlVowel.."]"..MACRON.."?ь?ӏ?ᵸ?)у","%1w")text=gsub(text,"(["..CyrlConsonant.."ъьЪЬ]"..br.."?)в","%1ʷ")end-- Add "j" before iotated vowels, and substitute non-iotated equivalents.text=gsub(gsub(text,"ё","jо"),"Ё","Jо")text=gsub(gsub(text,"ю","jу"),"Ю","Jу")text=gsub(gsub(text,"я","jа"),"Я","Jа")-- Process vowel modifiers.text=gsub(text,"(["..CyrlVowel.."])("..MACRON.."?)("..ACUTE.."?)(ь?)("..MACRON.."?)(ӏ?)(ᵸ?)",function(vowel,macron1,acute,soft,macron2,palochka,nasal)localret=vowel..(soft~=""andDIAERorsoft)..(palochka~=""andlang=="aqc"andDOTBELOWor"")..(nasal~=""andTILDEornasal)ifmacron1~=""thenret=ret..acute..lower(ret)elseret=ret..acuteendreturnret..(lang~="aqc"andpalochkaor"")end)iflang=="ce"orlang=="inh"thentext=gsub(text,"([иИ])("..ACUTE.."?"..")й(ᵸ?)",double_with_j)text=gsub(text,"([уУ]"..DIAER..")("..ACUTE.."?"..")й(ᵸ?)",double_with_j)end-- Apply language-specific substitutions by iterating over each subtable. For each one, create a temporary table that stores each substitution in lowercase and uppercase variants. Then, iterate over all substitutions.for_,iinipairs(subs)dolocalt={}-- Create a temporary table, then iterate over all of them.fork,vinpairs(i)dot[k]=vifv=="ʔ"thent[gsub(k,"^.",upper)]=gsub(v,"^.","Ɂ")elset[gsub(k,"^.",upper)]=gsub(v,"^.",upper)endendforletter,replacementinpairs(t)dotext=text:gsub(letter,replacement)endend-- Reposition apostrophes, remove temporary breaking characters, then decompose.text=toNFD(gsub(gsub(gsub(text,"ʼʲ","ʲʼ"),"ʼʷ","ʷʼ"),br,""))-- When double letters both have a modifier letter and/or an apostrophe, only show on the first or second for readability purposes.forletterinstring.gmatch("abcdefghijklmnopqrstuvwxyzəɣıʔABCDEFGHIJKLMNOPQRSTUVWXYZƏƔɁʻˀ",".[\128-\191]*")dotext=gsub(text,"(ᵏ?)"..letter.."("..accent.."?"..accent.."?"..accent.."?)([ʰʲˡʳˢʷˣˠ]?[ʲʷ]?ʼ?)".."%1"..lower(letter).."%2%3","%1"..letter.."%2"..lower(letter).."%2%3")end-- Remove consecutive j/ʲ and w/ʷ.text=gsub(gsub(text,"ʲ?([Jj])ʲ?","%1"),"ʷ?([Ww])ʷ?","%1")-- Substitute i for dotted dotless i if not followed by an acute or tilde, then recompose.returntoNFC(gsub(gsub(text,"ı".."("..DOTBELOW.."?)"..DOTABOVE.."([^"..ACUTE..TILDE.."])","i%1%2"),"ı".."("..DOTBELOW.."?)"..DOTABOVE.."$","i%1"))endreturnexport