Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite766bf7

Browse files
authored
fse: Optimize compression (#745)
* fse: Optimize table buildingSkipping the loop body when v == 0 helps endzerobits and normcount2. Notwriting to s.symbolLen in every iteration helps the other benchmarks.name old speed new speed deltaCompress/gettysburg-8 181MB/s ± 1% 183MB/s ± 0% +1.15% (p=0.002 n=10+8)Compress/digits-8 241MB/s ± 0% 241MB/s ± 1% ~ (p=0.434 n=9+10)Compress/twain-8 218MB/s ± 0% 218MB/s ± 0% ~ (p=0.755 n=10+10)Compress/low-ent-8 239MB/s ± 0% 239MB/s ± 1% ~ (p=0.853 n=10+10)Compress/superlow-ent-8 208MB/s ± 1% 208MB/s ± 0% ~ (p=0.408 n=9+7)Compress/endzerobits-8 11.5MB/s ± 1% 13.3MB/s ± 1% +16.35% (p=0.000 n=10+9)Compress/pngdata.001-8 224MB/s ± 0% 224MB/s ± 1% +0.38% (p=0.004 n=8+10)Compress/normcount2-8 35.7MB/s ± 1% 36.6MB/s ± 1% +2.66% (p=0.000 n=10+9)* fse: Skip bounds checkseach occurrence ofv3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]now incurs three bounds checks instead of four. I haven't found a way toeliminate the remaining three.name old speed new speed deltaCompress/gettysburg-8 183MB/s ± 0% 189MB/s ± 0% +3.32% (p=0.000 n=8+9)Compress/digits-8 241MB/s ± 1% 251MB/s ± 1% +4.14% (p=0.000 n=10+9)Compress/twain-8 218MB/s ± 0% 228MB/s ± 0% +4.36% (p=0.000 n=10+10)Compress/low-ent-8 239MB/s ± 1% 244MB/s ± 1% +1.90% (p=0.000 n=10+10)Compress/superlow-ent-8 208MB/s ± 0% 210MB/s ± 0% +0.89% (p=0.000 n=7+8)Compress/endzerobits-8 13.3MB/s ± 1% 13.4MB/s ± 1% +0.40% (p=0.019 n=9+10)Compress/pngdata.001-8 224MB/s ± 1% 225MB/s ± 1% +0.41% (p=0.006 n=10+9)Compress/normcount2-8 36.6MB/s ± 1% 36.4MB/s ± 1% -0.62% (p=0.012 n=9+10)
1 parent5f40643 commite766bf7

File tree

1 file changed

+15
-16
lines changed

1 file changed

+15
-16
lines changed

‎fse/compress.go‎

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error {
146146
c1.encodeZero(tt[src[ip-2]])
147147
ip-=2
148148
}
149+
src=src[:ip]
149150

150151
// Main compression loop.
151152
switch {
152153
case!s.zeroBits&&s.actualTableLog<=8:
153154
// We can encode 4 symbols without requiring a flush.
154155
// We do not need to check if any output is 0 bits.
155-
forip>=4 {
156+
for;len(src)>=4;src=src[:len(src)-4] {
156157
s.bw.flush32()
157-
v3,v2,v1,v0:=src[ip-4],src[ip-3],src[ip-2],src[ip-1]
158+
v3,v2,v1,v0:=src[len(src)-4],src[len(src)-3],src[len(src)-2],src[len(src)-1]
158159
c2.encode(tt[v0])
159160
c1.encode(tt[v1])
160161
c2.encode(tt[v2])
161162
c1.encode(tt[v3])
162-
ip-=4
163163
}
164164
case!s.zeroBits:
165165
// We do not need to check if any output is 0 bits.
166-
forip>=4 {
166+
for;len(src)>=4;src=src[:len(src)-4] {
167167
s.bw.flush32()
168-
v3,v2,v1,v0:=src[ip-4],src[ip-3],src[ip-2],src[ip-1]
168+
v3,v2,v1,v0:=src[len(src)-4],src[len(src)-3],src[len(src)-2],src[len(src)-1]
169169
c2.encode(tt[v0])
170170
c1.encode(tt[v1])
171171
s.bw.flush32()
172172
c2.encode(tt[v2])
173173
c1.encode(tt[v3])
174-
ip-=4
175174
}
176175
cases.actualTableLog<=8:
177176
// We can encode 4 symbols without requiring a flush
178-
forip>=4 {
177+
for;len(src)>=4;src=src[:len(src)-4] {
179178
s.bw.flush32()
180-
v3,v2,v1,v0:=src[ip-4],src[ip-3],src[ip-2],src[ip-1]
179+
v3,v2,v1,v0:=src[len(src)-4],src[len(src)-3],src[len(src)-2],src[len(src)-1]
181180
c2.encodeZero(tt[v0])
182181
c1.encodeZero(tt[v1])
183182
c2.encodeZero(tt[v2])
184183
c1.encodeZero(tt[v3])
185-
ip-=4
186184
}
187185
default:
188-
forip>=4 {
186+
for;len(src)>=4;src=src[:len(src)-4] {
189187
s.bw.flush32()
190-
v3,v2,v1,v0:=src[ip-4],src[ip-3],src[ip-2],src[ip-1]
188+
v3,v2,v1,v0:=src[len(src)-4],src[len(src)-3],src[len(src)-2],src[len(src)-1]
191189
c2.encodeZero(tt[v0])
192190
c1.encodeZero(tt[v1])
193191
s.bw.flush32()
194192
c2.encodeZero(tt[v2])
195193
c1.encodeZero(tt[v3])
196-
ip-=4
197194
}
198195
}
199196

@@ -459,15 +456,17 @@ func (s *Scratch) countSimple(in []byte) (max int) {
459456
for_,v:=rangein {
460457
s.count[v]++
461458
}
462-
m:=uint32(0)
459+
m,symlen:=uint32(0),s.symbolLen
463460
fori,v:=ranges.count[:] {
461+
ifv==0 {
462+
continue
463+
}
464464
ifv>m {
465465
m=v
466466
}
467-
ifv>0 {
468-
s.symbolLen=uint16(i)+1
469-
}
467+
symlen=uint16(i)+1
470468
}
469+
s.symbolLen=symlen
471470
returnint(m)
472471
}
473472

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp