Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita13f5dc

Browse files
committed
Optimize fastXOR with math/bits
Seegolang/go#31586 (comment)Thanks@renthrayskbenchmark old MB/s new MB/s speedupBenchmarkXOR/2/fast-8 470.88 492.61 1.05xBenchmarkXOR/3/fast-8 602.24 719.25 1.19xBenchmarkXOR/4/fast-8 718.82 1186.64 1.65xBenchmarkXOR/8/fast-8 1027.60 1718.71 1.67xBenchmarkXOR/16/fast-8 1413.31 3430.46 2.43xBenchmarkXOR/32/fast-8 2701.81 5585.42 2.07xBenchmarkXOR/128/fast-8 7757.97 13432.37 1.73xBenchmarkXOR/512/fast-8 15155.03 18797.79 1.24xBenchmarkXOR/4096/fast-8 20689.95 20334.61 0.98xBenchmarkXOR/16384/fast-8 21687.87 21613.94 1.00xNow its faster than basic XOR at every byte size greater than 2 onlittle endian amd64 machines.
1 parent2f8f69c commita13f5dc

File tree

4 files changed

+94
-81
lines changed

4 files changed

+94
-81
lines changed

‎conn.go‎

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"bufio"
77
"context"
88
"crypto/rand"
9+
"encoding/binary"
910
"errors"
1011
"fmt"
1112
"io"
@@ -81,7 +82,7 @@ type Conn struct {
8182
readerMsgCtx context.Context
8283
readerMsgHeaderheader
8384
readerFrameEOFbool
84-
readerMaskPosint
85+
readerMaskKeyuint32
8586

8687
setReadTimeoutchan context.Context
8788
setWriteTimeoutchan context.Context
@@ -324,7 +325,7 @@ func (c *Conn) handleControl(ctx context.Context, h header) error {
324325
}
325326

326327
ifh.masked {
327-
fastXOR(h.maskKey,0,b)
328+
fastXOR(h.maskKey,b)
328329
}
329330

330331
switchh.opcode {
@@ -445,8 +446,8 @@ func (c *Conn) reader(ctx context.Context, lock bool) (MessageType, io.Reader, e
445446

446447
c.readerMsgCtx=ctx
447448
c.readerMsgHeader=h
449+
c.readerMaskKey=h.maskKey
448450
c.readerFrameEOF=false
449-
c.readerMaskPos=0
450451
c.readMsgLeft=c.msgReadLimit.Load()
451452

452453
r:=&messageReader{
@@ -532,7 +533,7 @@ func (r *messageReader) read(p []byte, lock bool) (int, error) {
532533

533534
r.c.readerMsgHeader=h
534535
r.c.readerFrameEOF=false
535-
r.c.readerMaskPos=0
536+
r.c.readerMaskKey=h.maskKey
536537
}
537538

538539
h:=r.c.readerMsgHeader
@@ -545,7 +546,7 @@ func (r *messageReader) read(p []byte, lock bool) (int, error) {
545546
h.payloadLength-=int64(n)
546547
r.c.readMsgLeft-=int64(n)
547548
ifh.masked {
548-
r.c.readerMaskPos=fastXOR(h.maskKey,r.c.readerMaskPos,p)
549+
r.c.readerMaskKey=fastXOR(r.c.readerMaskKey,p)
549550
}
550551
r.c.readerMsgHeader=h
551552

@@ -761,7 +762,7 @@ func (c *Conn) writeFrame(ctx context.Context, fin bool, opcode opcode, p []byte
761762
c.writeHeader.payloadLength=int64(len(p))
762763

763764
ifc.client {
764-
_,err:=io.ReadFull(rand.Reader,c.writeHeader.maskKey[:])
765+
err=binary.Read(rand.Reader,binary.BigEndian,&c.writeHeader.maskKey)
765766
iferr!=nil {
766767
return0,fmt.Errorf("failed to generate masking key: %w",err)
767768
}
@@ -809,7 +810,7 @@ func (c *Conn) realWriteFrame(ctx context.Context, h header, p []byte) (n int, e
809810
}
810811

811812
ifc.client {
812-
varkeyposint
813+
maskKey:=h.maskKey
813814
forlen(p)>0 {
814815
ifc.bw.Available()==0 {
815816
err=c.bw.Flush()
@@ -831,7 +832,7 @@ func (c *Conn) realWriteFrame(ctx context.Context, h header, p []byte) (n int, e
831832
returnn,err
832833
}
833834

834-
keypos=fastXOR(h.maskKey,keypos,c.writeBuf[i:i+n2])
835+
maskKey=fastXOR(maskKey,c.writeBuf[i:i+n2])
835836

836837
p=p[n2:]
837838
n+=n2

‎conn_export_test.go‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func (c *Conn) ReadFrame(ctx context.Context) (OpCode, []byte, error) {
3737
return0,nil,err
3838
}
3939
ifh.masked {
40-
fastXOR(h.maskKey,0,b)
40+
fastXOR(h.maskKey,b)
4141
}
4242
returnOpCode(h.opcode),b,nil
4343
}

‎frame.go‎

Lines changed: 56 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"io"
88
"math"
9+
"math/bits"
910
)
1011

1112
//go:generate stringer -type=opcode,MessageType,StatusCode -output=frame_stringer.go
@@ -69,7 +70,7 @@ type header struct {
6970
payloadLengthint64
7071

7172
maskedbool
72-
maskKey[4]byte
73+
maskKeyuint32
7374
}
7475

7576
funcmakeWriteHeaderBuf() []byte {
@@ -119,7 +120,7 @@ func writeHeader(b []byte, h header) []byte {
119120
ifh.masked {
120121
b[1]|=1<<7
121122
b=b[:len(b)+4]
122-
copy(b[len(b)-4:],h.maskKey[:])
123+
binary.LittleEndian.PutUint32(b[len(b)-4:],h.maskKey)
123124
}
124125

125126
returnb
@@ -192,7 +193,7 @@ func readHeader(b []byte, r io.Reader) (header, error) {
192193
}
193194

194195
ifh.masked {
195-
copy(h.maskKey[:],b)
196+
h.maskKey=binary.LittleEndian.Uint32(b)
196197
}
197198

198199
returnh,nil
@@ -321,122 +322,122 @@ func (ce CloseError) bytes() ([]byte, error) {
321322
returnbuf,nil
322323
}
323324

324-
// xor applies the WebSocket masking algorithm to p
325-
// with the given key where the first 3 bits of pos
326-
// are the starting position in the key.
325+
// fastXOR applies the WebSocket masking algorithm to p
326+
// with the given key.
327327
// See https://tools.ietf.org/html/rfc6455#section-5.3
328328
//
329-
// The returned value is the position of the next byte
330-
// to be used for masking in the key. This is so that
331-
// unmasking can be performed without the entire frame.
332-
funcfastXOR(key [4]byte,keyPosint,b []byte)int {
333-
// If the payload is greater than or equal to 16 bytes, then it's worth
334-
// masking 8 bytes at a time.
335-
// Optimization from https://github.com/golang/go/issues/31586#issuecomment-485530859
336-
iflen(b)>=16 {
337-
// We first create a key that is 8 bytes long
338-
// and is aligned on the position correctly.
339-
varalignedKey [8]byte
340-
fori:=rangealignedKey {
341-
alignedKey[i]=key[(i+keyPos)&3]
342-
}
343-
k:=binary.LittleEndian.Uint64(alignedKey[:])
329+
// The returned value is the correctly rotated key to
330+
// to continue to mask/unmask the message.
331+
//
332+
// It is optimized for LittleEndian and expects the key
333+
// to be in little endian.
334+
funcfastXOR(keyuint32,b []byte)uint32 {
335+
iflen(b)>=8 {
336+
key64:=uint64(key)<<32|uint64(key)
344337

345338
// At some point in the future we can clean these unrolled loops up.
346339
// See https://github.com/golang/go/issues/31586#issuecomment-487436401
347340

348341
// Then we xor until b is less than 128 bytes.
349342
forlen(b)>=128 {
350343
v:=binary.LittleEndian.Uint64(b)
351-
binary.LittleEndian.PutUint64(b,v^k)
344+
binary.LittleEndian.PutUint64(b,v^key64)
352345
v=binary.LittleEndian.Uint64(b[8:])
353-
binary.LittleEndian.PutUint64(b[8:],v^k)
346+
binary.LittleEndian.PutUint64(b[8:],v^key64)
354347
v=binary.LittleEndian.Uint64(b[16:])
355-
binary.LittleEndian.PutUint64(b[16:],v^k)
348+
binary.LittleEndian.PutUint64(b[16:],v^key64)
356349
v=binary.LittleEndian.Uint64(b[24:])
357-
binary.LittleEndian.PutUint64(b[24:],v^k)
350+
binary.LittleEndian.PutUint64(b[24:],v^key64)
358351
v=binary.LittleEndian.Uint64(b[32:])
359-
binary.LittleEndian.PutUint64(b[32:],v^k)
352+
binary.LittleEndian.PutUint64(b[32:],v^key64)
360353
v=binary.LittleEndian.Uint64(b[40:])
361-
binary.LittleEndian.PutUint64(b[40:],v^k)
354+
binary.LittleEndian.PutUint64(b[40:],v^key64)
362355
v=binary.LittleEndian.Uint64(b[48:])
363-
binary.LittleEndian.PutUint64(b[48:],v^k)
356+
binary.LittleEndian.PutUint64(b[48:],v^key64)
364357
v=binary.LittleEndian.Uint64(b[56:])
365-
binary.LittleEndian.PutUint64(b[56:],v^k)
358+
binary.LittleEndian.PutUint64(b[56:],v^key64)
366359
v=binary.LittleEndian.Uint64(b[64:])
367-
binary.LittleEndian.PutUint64(b[64:],v^k)
360+
binary.LittleEndian.PutUint64(b[64:],v^key64)
368361
v=binary.LittleEndian.Uint64(b[72:])
369-
binary.LittleEndian.PutUint64(b[72:],v^k)
362+
binary.LittleEndian.PutUint64(b[72:],v^key64)
370363
v=binary.LittleEndian.Uint64(b[80:])
371-
binary.LittleEndian.PutUint64(b[80:],v^k)
364+
binary.LittleEndian.PutUint64(b[80:],v^key64)
372365
v=binary.LittleEndian.Uint64(b[88:])
373-
binary.LittleEndian.PutUint64(b[88:],v^k)
366+
binary.LittleEndian.PutUint64(b[88:],v^key64)
374367
v=binary.LittleEndian.Uint64(b[96:])
375-
binary.LittleEndian.PutUint64(b[96:],v^k)
368+
binary.LittleEndian.PutUint64(b[96:],v^key64)
376369
v=binary.LittleEndian.Uint64(b[104:])
377-
binary.LittleEndian.PutUint64(b[104:],v^k)
370+
binary.LittleEndian.PutUint64(b[104:],v^key64)
378371
v=binary.LittleEndian.Uint64(b[112:])
379-
binary.LittleEndian.PutUint64(b[112:],v^k)
372+
binary.LittleEndian.PutUint64(b[112:],v^key64)
380373
v=binary.LittleEndian.Uint64(b[120:])
381-
binary.LittleEndian.PutUint64(b[120:],v^k)
374+
binary.LittleEndian.PutUint64(b[120:],v^key64)
382375
b=b[128:]
383376
}
384377

385378
// Then we xor until b is less than 64 bytes.
386379
forlen(b)>=64 {
387380
v:=binary.LittleEndian.Uint64(b)
388-
binary.LittleEndian.PutUint64(b,v^k)
381+
binary.LittleEndian.PutUint64(b,v^key64)
389382
v=binary.LittleEndian.Uint64(b[8:])
390-
binary.LittleEndian.PutUint64(b[8:],v^k)
383+
binary.LittleEndian.PutUint64(b[8:],v^key64)
391384
v=binary.LittleEndian.Uint64(b[16:])
392-
binary.LittleEndian.PutUint64(b[16:],v^k)
385+
binary.LittleEndian.PutUint64(b[16:],v^key64)
393386
v=binary.LittleEndian.Uint64(b[24:])
394-
binary.LittleEndian.PutUint64(b[24:],v^k)
387+
binary.LittleEndian.PutUint64(b[24:],v^key64)
395388
v=binary.LittleEndian.Uint64(b[32:])
396-
binary.LittleEndian.PutUint64(b[32:],v^k)
389+
binary.LittleEndian.PutUint64(b[32:],v^key64)
397390
v=binary.LittleEndian.Uint64(b[40:])
398-
binary.LittleEndian.PutUint64(b[40:],v^k)
391+
binary.LittleEndian.PutUint64(b[40:],v^key64)
399392
v=binary.LittleEndian.Uint64(b[48:])
400-
binary.LittleEndian.PutUint64(b[48:],v^k)
393+
binary.LittleEndian.PutUint64(b[48:],v^key64)
401394
v=binary.LittleEndian.Uint64(b[56:])
402-
binary.LittleEndian.PutUint64(b[56:],v^k)
395+
binary.LittleEndian.PutUint64(b[56:],v^key64)
403396
b=b[64:]
404397
}
405398

406399
// Then we xor until b is less than 32 bytes.
407400
forlen(b)>=32 {
408401
v:=binary.LittleEndian.Uint64(b)
409-
binary.LittleEndian.PutUint64(b,v^k)
402+
binary.LittleEndian.PutUint64(b,v^key64)
410403
v=binary.LittleEndian.Uint64(b[8:])
411-
binary.LittleEndian.PutUint64(b[8:],v^k)
404+
binary.LittleEndian.PutUint64(b[8:],v^key64)
412405
v=binary.LittleEndian.Uint64(b[16:])
413-
binary.LittleEndian.PutUint64(b[16:],v^k)
406+
binary.LittleEndian.PutUint64(b[16:],v^key64)
414407
v=binary.LittleEndian.Uint64(b[24:])
415-
binary.LittleEndian.PutUint64(b[24:],v^k)
408+
binary.LittleEndian.PutUint64(b[24:],v^key64)
416409
b=b[32:]
417410
}
418411

419412
// Then we xor until b is less than 16 bytes.
420413
forlen(b)>=16 {
421414
v:=binary.LittleEndian.Uint64(b)
422-
binary.LittleEndian.PutUint64(b,v^k)
415+
binary.LittleEndian.PutUint64(b,v^key64)
423416
v=binary.LittleEndian.Uint64(b[8:])
424-
binary.LittleEndian.PutUint64(b[8:],v^k)
417+
binary.LittleEndian.PutUint64(b[8:],v^key64)
425418
b=b[16:]
426419
}
427420

428421
// Then we xor until b is less than 8 bytes.
429422
forlen(b)>=8 {
430423
v:=binary.LittleEndian.Uint64(b)
431-
binary.LittleEndian.PutUint64(b,v^k)
424+
binary.LittleEndian.PutUint64(b,v^key64)
432425
b=b[8:]
433426
}
434427
}
435428

429+
// Then we xor until b is less than 4 bytes.
430+
forlen(b)>=4 {
431+
v:=binary.LittleEndian.Uint32(b)
432+
binary.LittleEndian.PutUint32(b,v^key)
433+
b=b[4:]
434+
}
435+
436436
// xor remaining bytes.
437437
fori:=rangeb {
438-
b[i]^=key[keyPos&3]
439-
keyPos++
438+
b[i]^=byte(key)
439+
key=bits.RotateLeft32(key,-8)
440440
}
441-
returnkeyPos&3
441+
442+
returnkey
442443
}

‎frame_test.go‎

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ package websocket
44

55
import (
66
"bytes"
7+
"encoding/binary"
78
"io"
89
"math"
10+
"math/bits"
911
"math/rand"
1012
"strconv"
1113
"strings"
@@ -133,7 +135,7 @@ func TestHeader(t *testing.T) {
133135
}
134136

135137
ifh.masked {
136-
rand.Read(h.maskKey[:])
138+
h.maskKey=rand.Uint32()
137139
}
138140

139141
testHeader(t,h)
@@ -309,17 +311,17 @@ func Test_validWireCloseCode(t *testing.T) {
309311
funcTest_xor(t*testing.T) {
310312
t.Parallel()
311313

312-
key:= [4]byte{0xa,0xb,0xc,0xff}
314+
key:= []byte{0xa,0xb,0xc,0xff}
315+
key32:=binary.LittleEndian.Uint32(key)
313316
p:= []byte{0xa,0xb,0xc,0xf2,0xc}
314-
pos:=0
315-
pos=fastXOR(key,pos,p)
317+
gotKey32:=fastXOR(key32,p)
316318

317319
ifexp:= []byte{0,0,0,0x0d,0x6};!cmp.Equal(exp,p) {
318320
t.Fatalf("unexpected mask: %v",cmp.Diff(exp,p))
319321
}
320322

321-
ifexp:=1;!cmp.Equal(exp,pos) {
322-
t.Fatalf("unexpected maskpos: %v",cmp.Diff(exp,pos))
323+
ifexp:=bits.RotateLeft32(key32,-8);!cmp.Equal(exp,gotKey32) {
324+
t.Fatalf("unexpected maskkey: %v",cmp.Diff(exp,gotKey32))
323325
}
324326
}
325327

@@ -347,36 +349,45 @@ func BenchmarkXOR(b *testing.B) {
347349

348350
fns:= []struct {
349351
namestring
350-
fnfunc([4]byte,int, []byte)int
352+
fnfunc(b*testing.B,key[4]byte,p []byte)
351353
}{
352354
{
353-
"basic",
354-
basixXOR,
355+
name:"basic",
356+
fn:func(b*testing.B,key [4]byte,p []byte) {
357+
fori:=0;i<b.N;i++ {
358+
basixXOR(key,0,p)
359+
}
360+
},
355361
},
356362
{
357-
"fast",
358-
fastXOR,
363+
name:"fast",
364+
fn:func(b*testing.B,key [4]byte,p []byte) {
365+
key32:=binary.BigEndian.Uint32(key[:])
366+
b.ResetTimer()
367+
368+
fori:=0;i<b.N;i++ {
369+
fastXOR(key32,p)
370+
}
371+
},
359372
},
360373
}
361374

362-
varmaskKey [4]byte
363-
_,err:=rand.Read(maskKey[:])
375+
varkey [4]byte
376+
_,err:=rand.Read(key[:])
364377
iferr!=nil {
365378
b.Fatalf("failed to populate mask key: %v",err)
366379
}
367380

368381
for_,size:=rangesizes {
369-
data:=make([]byte,size)
382+
p:=make([]byte,size)
370383

371384
b.Run(strconv.Itoa(size),func(b*testing.B) {
372385
for_,fn:=rangefns {
373386
b.Run(fn.name,func(b*testing.B) {
374387
b.ReportAllocs()
375388
b.SetBytes(int64(size))
376389

377-
fori:=0;i<b.N;i++ {
378-
fn.fn(maskKey,0,data)
379-
}
390+
fn.fn(b,key,p)
380391
})
381392
}
382393
})

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp