Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitec8719c

Browse files
nathan-bossartchiranmoyfSusmithaDevanga17
committed
Optimize hex_encode() and hex_decode() using SIMD.
The hex_encode() and hex_decode() functions serve as the workhorsesfor hexadecimal data for bytea's text format conversion functions,and some workloads are sensitive to their performance. This commitadds new implementations that use routines from port/simd.h, whichtesting indicates are much faster for larger inputs. For small orinvalid inputs, we fall back on the existing scalar versions.Since we are using port/simd.h, these optimizations apply to bothx86-64 and AArch64.Author: Nathan Bossart <nathandbossart@gmail.com>Co-authored-by: Chiranmoy Bhattacharya <chiranmoy.bhattacharya@fujitsu.com>Co-authored-by: Susmitha Devanga <devanga.susmitha@fujitsu.com>Reviewed-by: John Naylor <johncnaylorls@gmail.com>Discussion:https://postgr.es/m/aLhVWTRy0QPbW2tl%40nathan
1 parent5b5e8a2 commitec8719c

File tree

4 files changed

+418
-4
lines changed

4 files changed

+418
-4
lines changed

‎src/backend/utils/adt/encode.c‎

Lines changed: 133 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include<ctype.h>
1717

1818
#include"mb/pg_wchar.h"
19+
#include"port/simd.h"
1920
#include"utils/builtins.h"
2021
#include"utils/memutils.h"
2122
#include"varatt.h"
@@ -177,8 +178,8 @@ static const int8 hexlookup[128] = {
177178
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
178179
};
179180

180-
uint64
181-
hex_encode(constchar*src,size_tlen,char*dst)
181+
staticinlineuint64
182+
hex_encode_scalar(constchar*src,size_tlen,char*dst)
182183
{
183184
constchar*end=src+len;
184185

@@ -193,6 +194,55 @@ hex_encode(const char *src, size_t len, char *dst)
193194
return (uint64)len*2;
194195
}
195196

197+
uint64
198+
hex_encode(constchar*src,size_tlen,char*dst)
199+
{
200+
#ifdefUSE_NO_SIMD
201+
returnhex_encode_scalar(src,len,dst);
202+
#else
203+
constuint64tail_idx=len& ~(sizeof(Vector8)-1);
204+
uint64i;
205+
206+
/*
207+
* This splits the high and low nibbles of each byte into separate
208+
* vectors, adds the vectors to a mask that converts the nibbles to their
209+
* equivalent ASCII bytes, and interleaves those bytes back together to
210+
* form the final hex-encoded string.
211+
*/
212+
for (i=0;i<tail_idx;i+=sizeof(Vector8))
213+
{
214+
Vector8srcv;
215+
Vector8lo;
216+
Vector8hi;
217+
Vector8mask;
218+
219+
vector8_load(&srcv, (constuint8*)&src[i]);
220+
221+
lo=vector8_and(srcv,vector8_broadcast(0x0f));
222+
mask=vector8_gt(lo,vector8_broadcast(0x9));
223+
mask=vector8_and(mask,vector8_broadcast('a'-'0'-10));
224+
mask=vector8_add(mask,vector8_broadcast('0'));
225+
lo=vector8_add(lo,mask);
226+
227+
hi=vector8_and(srcv,vector8_broadcast(0xf0));
228+
hi=vector8_shift_right(hi,4);
229+
mask=vector8_gt(hi,vector8_broadcast(0x9));
230+
mask=vector8_and(mask,vector8_broadcast('a'-'0'-10));
231+
mask=vector8_add(mask,vector8_broadcast('0'));
232+
hi=vector8_add(hi,mask);
233+
234+
vector8_store((uint8*)&dst[i*2],
235+
vector8_interleave_low(hi,lo));
236+
vector8_store((uint8*)&dst[i*2+sizeof(Vector8)],
237+
vector8_interleave_high(hi,lo));
238+
}
239+
240+
(void)hex_encode_scalar(src+i,len-i,dst+i*2);
241+
242+
return (uint64)len*2;
243+
#endif
244+
}
245+
196246
staticinlinebool
197247
get_hex(constchar*cp,char*out)
198248
{
@@ -213,8 +263,8 @@ hex_decode(const char *src, size_t len, char *dst)
213263
returnhex_decode_safe(src,len,dst,NULL);
214264
}
215265

216-
uint64
217-
hex_decode_safe(constchar*src,size_tlen,char*dst,Node*escontext)
266+
staticinlineuint64
267+
hex_decode_safe_scalar(constchar*src,size_tlen,char*dst,Node*escontext)
218268
{
219269
constchar*s,
220270
*srcend;
@@ -254,6 +304,85 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
254304
returnp-dst;
255305
}
256306

307+
/*
308+
* This helper converts each byte to its binary-equivalent nibble by
309+
* subtraction and combines them to form the return bytes (separated by zero
310+
* bytes). Returns false if any input bytes are outside the expected ranges of
311+
* ASCII values. Otherwise, returns true.
312+
*/
313+
#ifndefUSE_NO_SIMD
314+
staticinlinebool
315+
hex_decode_simd_helper(constVector8src,Vector8*dst)
316+
{
317+
Vector8sub;
318+
Vector8mask_hi=vector8_interleave_low(vector8_broadcast(0),vector8_broadcast(0x0f));
319+
Vector8mask_lo=vector8_interleave_low(vector8_broadcast(0x0f),vector8_broadcast(0));
320+
Vector8tmp;
321+
boolret;
322+
323+
tmp=vector8_gt(vector8_broadcast('9'+1),src);
324+
sub=vector8_and(tmp,vector8_broadcast('0'));
325+
326+
tmp=vector8_gt(src,vector8_broadcast('A'-1));
327+
tmp=vector8_and(tmp,vector8_broadcast('A'-10));
328+
sub=vector8_add(sub,tmp);
329+
330+
tmp=vector8_gt(src,vector8_broadcast('a'-1));
331+
tmp=vector8_and(tmp,vector8_broadcast('a'-'A'));
332+
sub=vector8_add(sub,tmp);
333+
334+
*dst=vector8_issub(src,sub);
335+
ret= !vector8_has_ge(*dst,0x10);
336+
337+
tmp=vector8_and(*dst,mask_hi);
338+
tmp=vector8_shift_right(tmp,8);
339+
*dst=vector8_and(*dst,mask_lo);
340+
*dst=vector8_shift_left(*dst,4);
341+
*dst=vector8_or(*dst,tmp);
342+
returnret;
343+
}
344+
#endif/* ! USE_NO_SIMD */
345+
346+
uint64
347+
hex_decode_safe(constchar*src,size_tlen,char*dst,Node*escontext)
348+
{
349+
#ifdefUSE_NO_SIMD
350+
returnhex_decode_safe_scalar(src,len,dst,escontext);
351+
#else
352+
constuint64tail_idx=len& ~(sizeof(Vector8)*2-1);
353+
uint64i;
354+
boolsuccess= true;
355+
356+
/*
357+
* We must process 2 vectors at a time since the output will be half the
358+
* length of the input.
359+
*/
360+
for (i=0;i<tail_idx;i+=sizeof(Vector8)*2)
361+
{
362+
Vector8srcv;
363+
Vector8dstv1;
364+
Vector8dstv2;
365+
366+
vector8_load(&srcv, (constuint8*)&src[i]);
367+
success &=hex_decode_simd_helper(srcv,&dstv1);
368+
369+
vector8_load(&srcv, (constuint8*)&src[i+sizeof(Vector8)]);
370+
success &=hex_decode_simd_helper(srcv,&dstv2);
371+
372+
vector8_store((uint8*)&dst[i /2],vector8_pack_16(dstv1,dstv2));
373+
}
374+
375+
/*
376+
* If something didn't look right in the vector path, try again in the
377+
* scalar path so that we can handle it correctly.
378+
*/
379+
if (!success)
380+
i=0;
381+
382+
returni /2+hex_decode_safe_scalar(src+i,len-i,dst+i /2,escontext);
383+
#endif
384+
}
385+
257386
staticuint64
258387
hex_enc_len(constchar*src,size_tsrclen)
259388
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp