Movatterモバイル変換


[0]ホーム

URL:


Jump to content
WiktionaryThe Free Dictionary
Search

User:Erutuon/scripts/scriptRecognition.js

    From Wiktionary, the free dictionary
    <User:Erutuon |scripts

    Note: You may have to bypass your browser’s cache to see the changes. In addition, after saving a sitewide CSS file such asMediaWiki:Common.css, it will take 5-10 minutes before the changes take effect, even if you clear your cache.

    • Mozilla / Firefox / Safari: holdShift while clickingReload, or press eitherCtrl-F5 orCtrl-R (Command-R on a Macintosh);
    • Konqueror andChrome: clickReload or pressF5;
    • Opera: clear the cache inTools → Preferences;
    • Internet Explorer: holdCtrl while clickingRefresh, or pressCtrl-F5.

    This user script lacks adocumentation subpage. Pleasecreate it.
    Useful links:root pageroot page’s subpageslinksredirectsyour own
    /*Two functions: a codepoint-to-Wiktionary-script-code function likechar_to_script in [[Module:Unicode data]], and a string-to-script-codefunction. Language-agnostic.Originally from [[User:Erutuon/scripts/watchlistScriptTagging.js]] and[[User:Erutuon/scripts/scriptTitles.js]].Requires ECMAScript 2016 (ES7) because it uses Array.prototype.includes.*//* jshint esversion: 6 *//* globals mw */(functionscriptRecognitionIIFE(){'use strict';// The following data is from [[Module:Unicode data/scripts]] and ultimately// based on [[Module:scripts/data]].constscriptRanges=[[[0x41,0x5A,'Latn'],[0x61,0x7A,'Latn'],[0xC0,0xD6,'Latn'],[0xD8,0xF6,'Latn'],[0xF8,0x24F,'Latn'],[0x370,0x3E1,'Grek'],[0x3E2,0x3EF,'Copt'],[0x3F0,0x3FF,'Grek'],[0x400,0x45F,'Cyrl'],[0x464,0x469,'Cyrs'],[0x46A,0x46D,'Cyrl'],[0x46F,0x471,'Cyrs'],[0x472,0x475,'Cyrl'],[0x476,0x489,'Cyrs'],[0x48A,0x527,'Cyrl'],[0x531,0x58F,'Armn'],[0x590,0x5FF,'Hebr'],[0x600,0x6FF,'Arab'],[0x700,0x74F,'Syrc'],[0x750,0x77F,'Arab'],[0x780,0x7B1,'Thaa'],[0x7C0,0x7FA,'Nkoo'],[0x800,0x83E,'Samr'],[0x840,0x85E,'Mand'],[0x860,0x86A,'Syrc'],[0x8A0,0x8FF,'Arab'],[0x900,0x97F,'Deva'],[0x981,0x9FA,'Beng'],[0xA01,0xA75,'Guru'],[0xA81,0xAF1,'Gujr'],[0xB01,0xB77,'Orya'],[0xB82,0xBFA,'Taml'],[0xC01,0xC7F,'Telu'],[0xC82,0xCF2,'Knda'],[0xD02,0xD7F,'Mlym'],[0xD82,0xDF4,'Sinh'],[0xE01,0xE5B,'Thai'],[0xE81,0xEDF,'Laoo'],[0xF00,0xFDA,'Tibt']],[[0x1000,0x109F,'Mymr'],[0x10A0,0x10CD,'Geok'],// Asomtavruli[0x10D0,0x10FC,'Geor'],// Mkhedruli[0x1100,0x11FF,'Hang'],[0x1200,0x1399,'Ethi'],[0x13A0,0x13F4,'Cher'],[0x1400,0x167F,'Cans'],[0x1680,0x169C,'Ogam'],[0x16A0,0x16F0,'Runr'],[0x1700,0x1714,'Tglg'],[0x1720,0x1734,'Hano'],[0x1740,0x1753,'Buhd'],[0x1760,0x1773,'Tagb'],[0x1780,0x17F9,'Khmr'],[0x1800,0x18AA,'Mong'],[0x1900,0x194F,'Limb'],[0x1950,0x1974,'Tale'],[0x1980,0x19DF,'Talu'],[0x19E0,0x19FF,'Khmr'],[0x1A00,0x1A1F,'Bugi'],[0x1A20,0x1AAD,'Lana'],[0x1B00,0x1B7C,'Bali'],[0x1B80,0x1BBF,'Sund'],[0x1BC0,0x1BFF,'Batk'],[0x1C00,0x1C4F,'Lepc'],[0x1C50,0x1C7F,'Olck'],[0x1E00,0x1EFF,'Latn'],[0x1F00,0x1FFE,'polytonic']],[[0x2200,0x22FF,'Zmth'],[0x2300,0x23F3,'Zsym'],[0x2500,0x27BF,'Zsym'],[0x27C0,0x27EF,'Zmth'],[0x2800,0x28FF,'Brai'],[0x2980,0x29FF,'Zmth'],[0x2A00,0x2AFF,'Zmth'],[0x2C00,0x2C5E,'Glag'],[0x2C60,0x2C7F,'Latinx'],[0x2C80,0x2CFF,'Copt'],[0x2D00,0x2D2D,'Geok'],// Nuskhuri[0x2D30,0x2D7F,'Tfng'],[0x2D80,0x2DDE,'Ethi'],[0x2E80,0x2FDF,'Hani']],[[0x3000,0x303F,'Hani'],[0x3041,0x309F,'Hira'],[0x30A0,0x30FF,'Kana'],[0x3105,0x312D,'Bopo'],[0x3131,0x318E,'Hang'],[0x31A0,0x31BA,'Bopo'],[0x31C0,0x31E3,'Hani'],[0x31F0,0x31FF,'Kana'],[0x3300,0x3357,'Kana'],[0x337B,0x337F,'Hani'],[0x3400,0x3FFF,'Hani']],[[0x4000,0x4DB5,'Hani'],[0x4E00,0x4FFF,'Hani']],[[0x5000,0x5FFF,'Hani']],[[0x6000,0x6FFF,'Hani']],[[0x7000,0x7FFF,'Hani']],[[0x8000,0x8FFF,'Hani']],[[0x9000,0x9FFF,'Hani']],[[0xA000,0xA4C6,'Yiii'],[0xA4D0,0xA4FF,'Lisu'],[0xA500,0xA62B,'Vaii'],[0xA640,0xA697,'Cyrs'],[0xA680,0xA697,'Cyrl'],[0xA6A0,0xA6F7,'Bamu'],[0xA720,0xA7FF,'Latinx'],[0xA800,0xA82B,'Sylo'],[0xA840,0xA877,'Phag'],[0xA880,0xA8D9,'Saur'],[0xA8E0,0xA8FB,'Deva'],[0xA900,0xA92F,'Kali'],[0xA930,0xA95F,'Rjng'],[0xA980,0xA9DF,'Java'],[0xA9E0,0xA9FE,'Mymr'],[0xAA00,0xAA5F,'Cham'],[0xAA60,0xAA7F,'Mymr'],[0xAA80,0xAADF,'Tavt'],[0xAAE0,0xAAFF,'Mtei'],[0xAB01,0xAB2E,'Ethi'],[0xAB30,0xAB65,'Latinx'],[0xAB70,0xABBF,'Cher'],[0xABC0,0xABFF,'Mtei'],[0xAC00,0xAFFF,'Hang']],[[0xB000,0xBFFF,'Hang']],[[0xC000,0xCFFF,'Hang']],[[0xD000,0xD7A3,'Hang']],[// no data for 0xF000-0xFFFF],[[0xFB13,0xFB17,'Armn'],[0xFB1D,0xFB4F,'Hebr'],[0xFB50,0xFDFD,'Arab'],[0xFE70,0xFEFC,'Arab']],[[0x10000,0x100FA,'Linb'],[0x10280,0x1029C,'Lyci'],[0x102A0,0x102D0,'Cari'],[0x102E1,0x102FB,'Copt'],[0x10300,0x10323,'Ital'],[0x10330,0x1034A,'Goth'],[0x10350,0x1037A,'Perm'],[0x10380,0x1039F,'Ugar'],[0x103A0,0x103D5,'Xpeo'],[0x10400,0x1044F,'Dsrt'],[0x10450,0x1047F,'Shaw'],[0x10480,0x104A9,'Osma'],[0x104B0,0x104FB,'Osge'],[0x10500,0x10527,'Elba'],[0x10530,0x10563,'Aghb'],[0x10600,0x10767,'Lina'],[0x10800,0x1083F,'Cprt'],[0x10840,0x1085F,'Armi'],[0x10860,0x1087F,'Palm'],[0x10880,0x108AF,'Nbat'],[0x108E0,0x108FF,'Hatr'],[0x10900,0x1091F,'Phnx'],[0x10920,0x1093F,'Lydi'],[0x10980,0x1099F,'Mero'],[0x109A0,0x109BF,'Merc'],[0x10A00,0x10A58,'Khar'],[0x10A60,0x10A7F,'Sarb'],[0x10A80,0x10A9F,'Narb'],[0x10AC0,0x10AF6,'Mani'],[0x10B00,0x10B3F,'Avst'],[0x10B40,0x10B5F,'Prti'],[0x10B60,0x10B7F,'Phli'],[0x10B80,0x10BAF,'Phlp'],[0x10C00,0x10C48,'Orkh'],[0x10C80,0x10CB2,'Hung'],[0x10E60,0x10E7E,'Ruminumerals']],[[0x11000,0x1106F,'Brah'],[0x11080,0x110C1,'Kthi'],[0x110D0,0x110F9,'Sora'],[0x11100,0x11143,'Cakm'],[0x11176,0x11150,'Mahj'],[0x11180,0x111D9,'Shrd'],[0x11200,0x1123D,'Khoj'],[0x11280,0x112A9,'Mult'],[0x112B0,0x112F9,'Sind'],[0x11301,0x11374,'Gran'],[0x11400,0x1145D,'Newa'],[0x11480,0x114D9,'Tirh'],[0x11580,0x115DD,'Sidd'],[0x11600,0x11659,'Modi'],[0x11680,0x116C9,'Takr'],[0x11700,0x1173F,'Ahom'],[0x118A0,0x118FF,'Wara'],[0x11A00,0x11A47,'Zanb'],[0x11A50,0x11AA2,'Soyo'],[0x11AC0,0x11AF8,'Pauc'],[0x11C00,0x11C6C,'Bhks'],[0x11C70,0x11CB6,'Marc'],[0x11D00,0x11D59,'Gonm']],[[0x12000,0x1236E,'Xsux'],[0x12400,0x12473,'Xsux']],[[0x13000,0x1342E,'Egyp']],[[0x14400,0x14646,'Hluw']],[// no data for 0x15000-0x15FFF],[[0x16800,0x16A38,'Bamu'],[0x16A40,0x16A6F,'Mroo'],[0x16AD0,0x16AF5,'Bass'],[0x16B00,0x16B8F,'Hmng'],[0x16F00,0x16F9F,'Plrd']],[[0x17000,0x17FFF,'Tang']],[[0x18000,0x187EC,'Tang'],[0x18800,0x18AF2,'Tang']],[// no data for 0x19000-0x19FFF],[// no data for 0x1A000-0x1AFFF],[[0x1B002,0x1B11E,'Hira'],// no unique code for hentaigana on Wiktionary[0x1B170,0x1B2FB,'Nshu'],[0x1BC00,0x1BC9F,'Dupl']],[// no data for 0x1C000-0x1CFFF],[[0x1D100,0x1D1DD,'musical'],[0x1D400,0x1D7FF,'Zmth'],[0x1D800,0x1DAAF,'Sgnw']],[[0x1E000,0x1E02A,'Glag'],[0x1E800,0x1E8D6,'Mend'],[0x1E900,0x1E95F,'Adlm']],[[0x1F300,0x1F6C5,'Zsym']],[[0x20000,0x20FFF,'Hani']],[[0x21000,0x21FFF,'Hani']],[[0x22000,0x22FFF,'Hani']],[[0x23000,0x23FFF,'Hani']],[[0x24000,0x24FFF,'Hani']],[[0x25000,0x25FFF,'Hani']],[[0x26000,0x26FFF,'Hani']],[[0x27000,0x27FFF,'Hani']],[[0x28000,0x28FFF,'Hani']],[[0x29000,0x29FFF,'Hani']],[[0x2A000,0x2AFFF,'Hani']],[[0x2B000,0x2BFFF,'Hani']],[[0x2C000,0x2CFFF,'Hani']],[[0x2D000,0x2DFFF,'Hani']],[[0x2E000,0x2EBE0,'Hani']]];constcharToScript={0x460:'Cyrs',0x461:'Cyrs',0x462:'Cyrl',0x463:'Cyrl',0x2135:'Zmth',0x2190:'Zsym',0x21FF:'Zsym',0xFA0E:'Hani',0xFA0F:'Hani',0xFA11:'Hani',0xFA13:'Hani',0xFA14:'Hani',0xFA1F:'Hani',0xFA21:'Hani',0xFA23:'Hani',0xFA24:'Hani',0xFA27:'Hani',0xFA28:'Hani',0xFA29:'Hani',0x1056F:'Aghb',0x16FE0:'Tang',0x1B000:'Kana',0x1B001:'Hira'};// Groups of 4096 (0x1000) codepoints.constgroupToScript=[[4,9,'Hani'],[11,13,'Hang'],[32,46,'Hani']];// Used to decide which script "wins" when text contains characters from two// script categories.// For example, text containing both Grek and polytonic characters should be// tagged as polytonic.constscriptOverrulings={'Grek':'polytonic','Cyrl':'Cyrs','Latinx':'Latn',};/*Handles scripts that contain two or more basic scripts.The first item is the code of the compound script; the other items arethe component scripts, or in the case of Hani, the other scripts thatcan be used alongside it.Hani is treated as a compound script for convenience, because itsometimes uses Latn characters.*/constcompoundScripts=[["Hani","Latn"],["Jpan","Hani","Hira","Kana","Latn"],["Kore","Hang","Hani","Latn"]];constlog=window.scriptRecognition&&window.scriptRecognition.log?console.log.bind(console):()=>{};functionlinearSearch(codePoint,ranges){for(const[lower,higher,result]ofranges){// If ranges are greater than codepoint, no match will be found. Short-circuit the loop.if(codePoint<lower)returnnull;elseif(codePoint<=higher)returnresult;}}functioncompareRanges(range1,range2){returnrange1[0]===range2[0]?range1[1]-range2[1]:range1[0]-range2[0];}functionbinarySearch(codePoint,ranges){if(!ranges)returnnull;if(binarySearch.cache){constcacheResult=linearSearch(codePoint,binarySearch.cache);if(cacheResult)returncacheResult;}elsebinarySearch.cache=[];letbottom=0,middle=0,top=ranges.length;while(bottom<=top){middle=(bottom+top)>>1;constrange=ranges[middle];if(!range)break;if(codePoint<range[0])top=middle-1;elseif(codePoint<=range[1]){binarySearch.cache.push(range);binarySearch.cache.sort(compareRanges);returnrange[2];}elsebottom=middle+1;}returnnull;}// Returns a string (a Wiktionary script code) or null.functioncodePointToScript(codePoint){letscript=charToScript[codePoint];if(script)returnscript;constgroup=codePoint>>12;script=linearSearch(group,groupToScript);if(script)returnscript;constranges=scriptRanges[group];if(ranges===undefined)returnnull;elseif(ranges.length>5)returnbinarySearch(codePoint,ranges);elsereturnlinearSearch(codePoint,ranges);}functiongetScripts(string){constscripts=[];for(constcharacterofstring){constscript=codePointToScript(character.codePointAt(0));if(script!==null&&!scripts.includes(script))scripts.push(script);}returnscripts;}// Chooses one script out of an array of two or more scripts, or finds an// appropriate compound script.functionresolveScriptConflicts(scripts,string){returnscripts.reduce((winningScript,script)=>{if(winningScript===script||scriptOverrulings[script]===winningScript)returnwinningScript;elseif(scriptOverrulings[winningScript]===script){log(`${script} won out over${winningScript}`);returnscript;}constcompoundScript=compoundScripts.find(componentScripts=>componentScripts.includes(winningScript)&&componentScripts.includes(script));if(compoundScript){log(`${script} and${winningScript} were replaced with${compoundScript[0]}`);returncompoundScript[0];}else{log(`No script chosen out of${scripts.join(", ")} in this string:${string}.`);returnundefined;}});}functiongetScript(text){constscripts=getScripts(text);returnscripts.length>0?resolveScriptConflicts(scripts,text):undefined;}functioncontainsScript(text,scriptToFind){for(constcharacteroftext)if(codePointToScript(character.codePointAt(0))===scriptToFind)returntrue;returnfalse;}window.codePointToScript=codePointToScript;window.getScripts=getScripts;window.getScript=getScript;window.containsScript=containsScript;})();
    Retrieved from "https://en.wiktionary.org/w/index.php?title=User:Erutuon/scripts/scriptRecognition.js&oldid=51354560"

    [8]ページ先頭

    ©2009-2025 Movatter.jp