Movatterモバイル変換


[0]ホーム

URL:


Skip to content
Search Gists
Sign in Sign up

Instantly share code, notes, and snippets.

@mikesamuel
Last activeFebruary 21, 2019 05:43
    • Star(1)You must be signed in to star a gist
    • Fork(0)You must be signed in to fork a gist

    Select an option

    Save mikesamuel/20710f94a53e440691f04bf79bc3d756 to your computer and use it in GitHub Desktop.
    A canonicalizing function to make it easy to hash JSON
    "use strict";
    // Prompted by https://esdiscuss.org/topic/json-canonicalize
    // Given a string of JSON produces a string of JSON without unnecessary
    // degrees of freedom like whitespace, optional escape sequences, and
    // unnecessary variance in number representation.
    functionhashable(json){
    conststrs=[]// Side table to collect string bodies
    returnreorderProperties(
    strs,
    json
    // Store string content in a side-table.
    // This makes the content is still valid JSON but without
    // any strings that contain whitespace, commas, colons or brackets.
    .replace(/"(?:[^\\"]|\\.)*"/g,(x)=>{strs.push(x);return`"@${strs.length-1}"`})
    // Eliminate all whitespace.
    .replace(/[\x00-\x20]+/g,'')
    // Canonicalize all numbers.
    .replace(/([+\-]?)(?=[\d.])(\d*)(?:[.](\d*))?(?:[eE]([+\-]?\d+))?(?=[,\]\}]|$)/g,canonNumber))
    // Restore the numbers from the side table
    .replace(/"@(\d+)"/g,canonStr.bind(null,strs))
    }
    // RegExp replacer function that canonicalizes a number even if it
    // is not precisely representable in JavaScript.
    functioncanonNumber(_,sign,integer,fraction,mantissa){
    if(sign!=='-'){sign=''}
    integer=integer||'0'
    fraction=fraction||''
    mantissa=(+mantissa||0)+integer.length-1
    letdigits=(integer+fraction)
    // Strip zeroes from front
    .replace(/^(?:0(?!$))+/,s=>(mantissa-=s.length,''))||'0'
    if(digits==='0'){mantissa=0}
    // Exponential notation.
    if(-26>=mantissa||mantissa>=26){
    constafterdot=digits.substring(1).replace(/0+$/,'')
    returnsign+digits[0]+(afterdot ?`.${afterdot}` :'')+'e'+mantissa
    }
    return(mantissa<0)
    // Pad zeroes left
    // digits=123456, mantissa=-1 -> 0.123456
    ?sign+'0.'+'0'.repeat(-mantissa-1)+digits
    :(mantissa+1<digits.length)
    // Dot in the middle
    ?sign+digits.substring(0,mantissa+1)+'.'+digits.substring(mantissa+1)
    // Pad zeroes right
    // digits=123456, mantissa=6 -> 1234560
    :sign+digits+'0'.repeat(mantissa-digits.length+1)
    }
    // Pulls a string out of a side table. "@1" -> strs[1] but with escape sequences normalized.
    constcanonStr=(strs,s)=>JSON.stringify(JSON.parse(strs[s.substring(2,s.length-1)]))
    // Recursively reorder properties on JSON {}-style records
    functionreorderProperties(strs,s){
    // Pull out tokens and their indices as cues to structure
    consttokens=[]
    consttokenPattern=/[{}\[\],]/g
    for(letmatch;(match=tokenPattern.exec(s));){
    tokens.push(match)// Match has form ['{'] but with index propert.
    }
    if(!tokens.length){returns}
    constmemoTable=[]
    functioncanonKey(strIndex){
    if(memoTable[strIndex]===undefined){
    memoTable[strIndex]=JSON.parse(strs[strIndex])
    }
    returnmemoTable[strIndex]
    }
    // Walk the structure and rebuild the output but ordered.
    returnreorder(0,tokens.length-1)
    functionreorder(left,right){
    const{"0":token, index}=tokens[left]
    letrangeStart=left,valueStart=null
    letdepth,cursor
    letranges=[]
    for(depth=1,cursor=left+1;cursor<right;++cursor){
    switch(tokens[cursor][0]){
    case'{':case'[':
    if(depth===1){valueStart=cursor}
    ++depth
    break
    case']':case'}':--depth;break
    case',':
    if(depth===1){
    ranges.push([rangeStart,valueStart,cursor])
    rangeStart=cursor
    valueStart=null
    }
    }
    }
    ranges.push([rangeStart,valueStart,right])
    ranges=ranges.map(
    ([start,valueStart,end])=>(valueStart===null
    // No nested object or array
    ?s.substring(tokens[start].index+1,tokens[end].index)
    // Recurse to nested object or array
    :s.substring(tokens[start].index+1,tokens[valueStart].index)+reorder(valueStart,end-1)))
    if(token==='{'){
    ranges.sort((a,b)=>{
    constaKey=canonKey(/^"@(\d+)":/.exec(a)[1])
    constbKey=canonKey(/^"@(\d+)":/.exec(b)[1])
    returnaKey<bKey ?-1 :aKey===bKey ?0 :1
    })
    }
    returntoken+ranges.join(',')+tokens[right][0]
    }
    }
    if(typeofmodule!=='undefined'){module.exports={ hashable}}
    // Tests for canonNumber
    void([
    {input:['','123','456','0'],want:'123.456'},
    {input:['+','123','456','0'],want:'123.456'},
    {input:['-','123','456','0'],want:'-123.456'},
    {input:['','123','456','30'],want:'1.23456e32'},
    {input:['','123','456','-30'],want:'1.23456e-28'},
    {input:['-','123','456','-30'],want:'-1.23456e-28'},
    {input:['','123','456','-7'],want:'0.0000123456'},
    {input:['','123','456','-6'],want:'0.000123456'},
    {input:['','123','456','-5'],want:'0.00123456'},
    {input:['','123','456','-4'],want:'0.0123456'},
    {input:['','123','456','-3'],want:'0.123456'},
    {input:['','123','456','-2'],want:'1.23456'},
    {input:['','123','456','-1'],want:'12.3456'},
    {input:['','123','456','-0'],want:'123.456'},
    {input:['','123','456','0'],want:'123.456'},
    {input:['','123','456','1'],want:'1234.56'},
    {input:['','123','456','2'],want:'12345.6'},
    {input:['','123','456','3'],want:'123456'},
    {input:['','123','456','4'],want:'1234560'},
    {input:['','123','456','5'],want:'12345600'},
    {input:['','123','456','6'],want:'123456000'},
    {input:['','123','456','7'],want:'1234560000'},
    {input:['','0123','456','2'],want:'12345.6'},
    {input:['','0','05','-2'],want:'0.0005'},
    {input:['','0','05','2'],want:'5'},
    ].forEach(({ input, want},i)=>{
    console.group(`Test #${i}:${JSON.stringify(input)}`)
    constgot=canonNumber(0, ...input)
    console.log(got)
    if(got!==want){
    thrownewError('expected '+want)
    }
    console.groupEnd()
    }))
    // Tests for hashable
    void([
    {input:String.raw`0`,want:String.raw`0`},
    {input:String.raw`0.0`,want:String.raw`0`},
    {input:String.raw`00`,want:String.raw`0`},
    {input:String.raw`1e100`,want:String.raw`1e100`},
    {input:String.raw`"foo"`,want:String.raw`"foo"`},
    {input:String.raw`{}`,want:String.raw`{}`},
    {input:String.raw`{ }`,want:String.raw`{}`},
    {input:String.raw`{ "c": 1, "a": 2, "b": 3 }`,want:String.raw`{"a":2,"b":3,"c":1}`},
    {input:String.raw`{ "c": 1, "a": { "~": 2, "b": 3 }}`,want:String.raw`{"a":{"b":3,"~":2},"c":1}`},
    {input:String.raw`{ "": [] }`,want:String.raw`{"":[]}`},
    {input:String.raw`[]`,want:String.raw`[]`},
    {input:String.raw`[ ]`,want:String.raw`[]`},
    {input:String.raw`"\u0022"`,want:String.raw`"\""`},
    {input:String.raw`"\\\/"`,want:String.raw`"\\/"`},
    {
    input:String.raw`{
    "foo" : {
    "boo": null,
    "123.456e1" : [ 123.456e1, "foo\/bar\u0022"],
    "bar":"baz"
    }
    } `,
    want:String.raw`{"foo":{"123.456e1":[1234.56,"foo/bar\""],"bar":"baz","boo":null}}`
    },
    ].forEach(({ input, want},i)=>{
    console.group(`Test #${i}:${JSON.stringify(input)}`)
    constgot=hashable(input)
    if(got!==want){
    thrownewError(`got${got}, expected${want}`)
    }
    console.groupEnd()
    }))
    @cyberphone
    Copy link

    Hi Mike,
    Although the JSON canonicalization concept I'm working on is very different to the one above I have adopted a term I got from you "Hashable" JSON:
    https://tools.ietf.org/html/draft-rundgren-json-canonicalization-scheme-05
    One reviewer claimed it was a very confused term but I think it is perfect : )
    Thanx!

    Sign up for freeto join this conversation on GitHub. Already have an account?Sign in to comment

    [8]ページ先頭

    ©2009-2026 Movatter.jp