Instantly share code, notes, and snippets.
Last activeFebruary 21, 2019 05:43
Save mikesamuel/20710f94a53e440691f04bf79bc3d756 to your computer and use it in GitHub Desktop.
A canonicalizing function to make it easy to hash JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
| "use strict"; | |
| // Prompted by https://esdiscuss.org/topic/json-canonicalize | |
| // Given a string of JSON produces a string of JSON without unnecessary | |
| // degrees of freedom like whitespace, optional escape sequences, and | |
| // unnecessary variance in number representation. | |
| functionhashable(json){ | |
| conststrs=[]// Side table to collect string bodies | |
| returnreorderProperties( | |
| strs, | |
| json | |
| // Store string content in a side-table. | |
| // This makes the content is still valid JSON but without | |
| // any strings that contain whitespace, commas, colons or brackets. | |
| .replace(/"(?:[^\\"]|\\.)*"/g,(x)=>{strs.push(x);return`"@${strs.length-1}"`}) | |
| // Eliminate all whitespace. | |
| .replace(/[\x00-\x20]+/g,'') | |
| // Canonicalize all numbers. | |
| .replace(/([+\-]?)(?=[\d.])(\d*)(?:[.](\d*))?(?:[eE]([+\-]?\d+))?(?=[,\]\}]|$)/g,canonNumber)) | |
| // Restore the numbers from the side table | |
| .replace(/"@(\d+)"/g,canonStr.bind(null,strs)) | |
| } | |
| // RegExp replacer function that canonicalizes a number even if it | |
| // is not precisely representable in JavaScript. | |
| functioncanonNumber(_,sign,integer,fraction,mantissa){ | |
| if(sign!=='-'){sign=''} | |
| integer=integer||'0' | |
| fraction=fraction||'' | |
| mantissa=(+mantissa||0)+integer.length-1 | |
| letdigits=(integer+fraction) | |
| // Strip zeroes from front | |
| .replace(/^(?:0(?!$))+/,s=>(mantissa-=s.length,''))||'0' | |
| if(digits==='0'){mantissa=0} | |
| // Exponential notation. | |
| if(-26>=mantissa||mantissa>=26){ | |
| constafterdot=digits.substring(1).replace(/0+$/,'') | |
| returnsign+digits[0]+(afterdot ?`.${afterdot}` :'')+'e'+mantissa | |
| } | |
| return(mantissa<0) | |
| // Pad zeroes left | |
| // digits=123456, mantissa=-1 -> 0.123456 | |
| ?sign+'0.'+'0'.repeat(-mantissa-1)+digits | |
| :(mantissa+1<digits.length) | |
| // Dot in the middle | |
| ?sign+digits.substring(0,mantissa+1)+'.'+digits.substring(mantissa+1) | |
| // Pad zeroes right | |
| // digits=123456, mantissa=6 -> 1234560 | |
| :sign+digits+'0'.repeat(mantissa-digits.length+1) | |
| } | |
| // Pulls a string out of a side table. "@1" -> strs[1] but with escape sequences normalized. | |
| constcanonStr=(strs,s)=>JSON.stringify(JSON.parse(strs[s.substring(2,s.length-1)])) | |
| // Recursively reorder properties on JSON {}-style records | |
| functionreorderProperties(strs,s){ | |
| // Pull out tokens and their indices as cues to structure | |
| consttokens=[] | |
| consttokenPattern=/[{}\[\],]/g | |
| for(letmatch;(match=tokenPattern.exec(s));){ | |
| tokens.push(match)// Match has form ['{'] but with index propert. | |
| } | |
| if(!tokens.length){returns} | |
| constmemoTable=[] | |
| functioncanonKey(strIndex){ | |
| if(memoTable[strIndex]===undefined){ | |
| memoTable[strIndex]=JSON.parse(strs[strIndex]) | |
| } | |
| returnmemoTable[strIndex] | |
| } | |
| // Walk the structure and rebuild the output but ordered. | |
| returnreorder(0,tokens.length-1) | |
| functionreorder(left,right){ | |
| const{"0":token, index}=tokens[left] | |
| letrangeStart=left,valueStart=null | |
| letdepth,cursor | |
| letranges=[] | |
| for(depth=1,cursor=left+1;cursor<right;++cursor){ | |
| switch(tokens[cursor][0]){ | |
| case'{':case'[': | |
| if(depth===1){valueStart=cursor} | |
| ++depth | |
| break | |
| case']':case'}':--depth;break | |
| case',': | |
| if(depth===1){ | |
| ranges.push([rangeStart,valueStart,cursor]) | |
| rangeStart=cursor | |
| valueStart=null | |
| } | |
| } | |
| } | |
| ranges.push([rangeStart,valueStart,right]) | |
| ranges=ranges.map( | |
| ([start,valueStart,end])=>(valueStart===null | |
| // No nested object or array | |
| ?s.substring(tokens[start].index+1,tokens[end].index) | |
| // Recurse to nested object or array | |
| :s.substring(tokens[start].index+1,tokens[valueStart].index)+reorder(valueStart,end-1))) | |
| if(token==='{'){ | |
| ranges.sort((a,b)=>{ | |
| constaKey=canonKey(/^"@(\d+)":/.exec(a)[1]) | |
| constbKey=canonKey(/^"@(\d+)":/.exec(b)[1]) | |
| returnaKey<bKey ?-1 :aKey===bKey ?0 :1 | |
| }) | |
| } | |
| returntoken+ranges.join(',')+tokens[right][0] | |
| } | |
| } | |
| if(typeofmodule!=='undefined'){module.exports={ hashable}} | |
| // Tests for canonNumber | |
| void([ | |
| {input:['','123','456','0'],want:'123.456'}, | |
| {input:['+','123','456','0'],want:'123.456'}, | |
| {input:['-','123','456','0'],want:'-123.456'}, | |
| {input:['','123','456','30'],want:'1.23456e32'}, | |
| {input:['','123','456','-30'],want:'1.23456e-28'}, | |
| {input:['-','123','456','-30'],want:'-1.23456e-28'}, | |
| {input:['','123','456','-7'],want:'0.0000123456'}, | |
| {input:['','123','456','-6'],want:'0.000123456'}, | |
| {input:['','123','456','-5'],want:'0.00123456'}, | |
| {input:['','123','456','-4'],want:'0.0123456'}, | |
| {input:['','123','456','-3'],want:'0.123456'}, | |
| {input:['','123','456','-2'],want:'1.23456'}, | |
| {input:['','123','456','-1'],want:'12.3456'}, | |
| {input:['','123','456','-0'],want:'123.456'}, | |
| {input:['','123','456','0'],want:'123.456'}, | |
| {input:['','123','456','1'],want:'1234.56'}, | |
| {input:['','123','456','2'],want:'12345.6'}, | |
| {input:['','123','456','3'],want:'123456'}, | |
| {input:['','123','456','4'],want:'1234560'}, | |
| {input:['','123','456','5'],want:'12345600'}, | |
| {input:['','123','456','6'],want:'123456000'}, | |
| {input:['','123','456','7'],want:'1234560000'}, | |
| {input:['','0123','456','2'],want:'12345.6'}, | |
| {input:['','0','05','-2'],want:'0.0005'}, | |
| {input:['','0','05','2'],want:'5'}, | |
| ].forEach(({ input, want},i)=>{ | |
| console.group(`Test #${i}:${JSON.stringify(input)}`) | |
| constgot=canonNumber(0, ...input) | |
| console.log(got) | |
| if(got!==want){ | |
| thrownewError('expected '+want) | |
| } | |
| console.groupEnd() | |
| })) | |
| // Tests for hashable | |
| void([ | |
| {input:String.raw`0`,want:String.raw`0`}, | |
| {input:String.raw`0.0`,want:String.raw`0`}, | |
| {input:String.raw`00`,want:String.raw`0`}, | |
| {input:String.raw`1e100`,want:String.raw`1e100`}, | |
| {input:String.raw`"foo"`,want:String.raw`"foo"`}, | |
| {input:String.raw`{}`,want:String.raw`{}`}, | |
| {input:String.raw`{ }`,want:String.raw`{}`}, | |
| {input:String.raw`{ "c": 1, "a": 2, "b": 3 }`,want:String.raw`{"a":2,"b":3,"c":1}`}, | |
| {input:String.raw`{ "c": 1, "a": { "~": 2, "b": 3 }}`,want:String.raw`{"a":{"b":3,"~":2},"c":1}`}, | |
| {input:String.raw`{ "": [] }`,want:String.raw`{"":[]}`}, | |
| {input:String.raw`[]`,want:String.raw`[]`}, | |
| {input:String.raw`[ ]`,want:String.raw`[]`}, | |
| {input:String.raw`"\u0022"`,want:String.raw`"\""`}, | |
| {input:String.raw`"\\\/"`,want:String.raw`"\\/"`}, | |
| { | |
| input:String.raw`{ | |
| "foo" : { | |
| "boo": null, | |
| "123.456e1" : [ 123.456e1, "foo\/bar\u0022"], | |
| "bar":"baz" | |
| } | |
| } `, | |
| want:String.raw`{"foo":{"123.456e1":[1234.56,"foo/bar\""],"bar":"baz","boo":null}}` | |
| }, | |
| ].forEach(({ input, want},i)=>{ | |
| console.group(`Test #${i}:${JSON.stringify(input)}`) | |
| constgot=hashable(input) | |
| if(got!==want){ | |
| thrownewError(`got${got}, expected${want}`) | |
| } | |
| console.groupEnd() | |
| })) |
cyberphone commentedFeb 21, 2019
Hi Mike,
Although the JSON canonicalization concept I'm working on is very different to the one above I have adopted a term I got from you "Hashable" JSON:
https://tools.ietf.org/html/draft-rundgren-json-canonicalization-scheme-05
One reviewer claimed it was a very confused term but I think it is perfect : )
Thanx!
Sign up for freeto join this conversation on GitHub. Already have an account?Sign in to comment