16
16
#include <ctype.h>
17
17
18
18
#include "mb/pg_wchar.h"
19
+ #include "port/simd.h"
19
20
#include "utils/builtins.h"
20
21
#include "utils/memutils.h"
21
22
#include "varatt.h"
@@ -177,8 +178,8 @@ static const int8 hexlookup[128] = {
177
178
-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,-1 ,
178
179
};
179
180
180
- uint64
181
- hex_encode (const char * src ,size_t len ,char * dst )
181
+ static inline uint64
182
+ hex_encode_scalar (const char * src ,size_t len ,char * dst )
182
183
{
183
184
const char * end = src + len ;
184
185
@@ -193,6 +194,55 @@ hex_encode(const char *src, size_t len, char *dst)
193
194
return (uint64 )len * 2 ;
194
195
}
195
196
197
+ uint64
198
+ hex_encode (const char * src ,size_t len ,char * dst )
199
+ {
200
+ #ifdef USE_NO_SIMD
201
+ return hex_encode_scalar (src ,len ,dst );
202
+ #else
203
+ const uint64 tail_idx = len & ~(sizeof (Vector8 )- 1 );
204
+ uint64 i ;
205
+
206
+ /*
207
+ * This splits the high and low nibbles of each byte into separate
208
+ * vectors, adds the vectors to a mask that converts the nibbles to their
209
+ * equivalent ASCII bytes, and interleaves those bytes back together to
210
+ * form the final hex-encoded string.
211
+ */
212
+ for (i = 0 ;i < tail_idx ;i += sizeof (Vector8 ))
213
+ {
214
+ Vector8 srcv ;
215
+ Vector8 lo ;
216
+ Vector8 hi ;
217
+ Vector8 mask ;
218
+
219
+ vector8_load (& srcv , (const uint8 * )& src [i ]);
220
+
221
+ lo = vector8_and (srcv ,vector8_broadcast (0x0f ));
222
+ mask = vector8_gt (lo ,vector8_broadcast (0x9 ));
223
+ mask = vector8_and (mask ,vector8_broadcast ('a' - '0' - 10 ));
224
+ mask = vector8_add (mask ,vector8_broadcast ('0' ));
225
+ lo = vector8_add (lo ,mask );
226
+
227
+ hi = vector8_and (srcv ,vector8_broadcast (0xf0 ));
228
+ hi = vector8_shift_right (hi ,4 );
229
+ mask = vector8_gt (hi ,vector8_broadcast (0x9 ));
230
+ mask = vector8_and (mask ,vector8_broadcast ('a' - '0' - 10 ));
231
+ mask = vector8_add (mask ,vector8_broadcast ('0' ));
232
+ hi = vector8_add (hi ,mask );
233
+
234
+ vector8_store ((uint8 * )& dst [i * 2 ],
235
+ vector8_interleave_low (hi ,lo ));
236
+ vector8_store ((uint8 * )& dst [i * 2 + sizeof (Vector8 )],
237
+ vector8_interleave_high (hi ,lo ));
238
+ }
239
+
240
+ (void )hex_encode_scalar (src + i ,len - i ,dst + i * 2 );
241
+
242
+ return (uint64 )len * 2 ;
243
+ #endif
244
+ }
245
+
196
246
static inline bool
197
247
get_hex (const char * cp ,char * out )
198
248
{
@@ -213,8 +263,8 @@ hex_decode(const char *src, size_t len, char *dst)
213
263
return hex_decode_safe (src ,len ,dst ,NULL );
214
264
}
215
265
216
- uint64
217
- hex_decode_safe (const char * src ,size_t len ,char * dst ,Node * escontext )
266
+ static inline uint64
267
+ hex_decode_safe_scalar (const char * src ,size_t len ,char * dst ,Node * escontext )
218
268
{
219
269
const char * s ,
220
270
* srcend ;
@@ -254,6 +304,85 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
254
304
return p - dst ;
255
305
}
256
306
307
+ /*
308
+ * This helper converts each byte to its binary-equivalent nibble by
309
+ * subtraction and combines them to form the return bytes (separated by zero
310
+ * bytes). Returns false if any input bytes are outside the expected ranges of
311
+ * ASCII values. Otherwise, returns true.
312
+ */
313
+ #ifndef USE_NO_SIMD
314
+ static inline bool
315
+ hex_decode_simd_helper (const Vector8 src ,Vector8 * dst )
316
+ {
317
+ Vector8 sub ;
318
+ Vector8 mask_hi = vector8_interleave_low (vector8_broadcast (0 ),vector8_broadcast (0x0f ));
319
+ Vector8 mask_lo = vector8_interleave_low (vector8_broadcast (0x0f ),vector8_broadcast (0 ));
320
+ Vector8 tmp ;
321
+ bool ret ;
322
+
323
+ tmp = vector8_gt (vector8_broadcast ('9' + 1 ),src );
324
+ sub = vector8_and (tmp ,vector8_broadcast ('0' ));
325
+
326
+ tmp = vector8_gt (src ,vector8_broadcast ('A' - 1 ));
327
+ tmp = vector8_and (tmp ,vector8_broadcast ('A' - 10 ));
328
+ sub = vector8_add (sub ,tmp );
329
+
330
+ tmp = vector8_gt (src ,vector8_broadcast ('a' - 1 ));
331
+ tmp = vector8_and (tmp ,vector8_broadcast ('a' - 'A' ));
332
+ sub = vector8_add (sub ,tmp );
333
+
334
+ * dst = vector8_issub (src ,sub );
335
+ ret = !vector8_has_ge (* dst ,0x10 );
336
+
337
+ tmp = vector8_and (* dst ,mask_hi );
338
+ tmp = vector8_shift_right (tmp ,8 );
339
+ * dst = vector8_and (* dst ,mask_lo );
340
+ * dst = vector8_shift_left (* dst ,4 );
341
+ * dst = vector8_or (* dst ,tmp );
342
+ return ret ;
343
+ }
344
+ #endif /* ! USE_NO_SIMD */
345
+
346
+ uint64
347
+ hex_decode_safe (const char * src ,size_t len ,char * dst ,Node * escontext )
348
+ {
349
+ #ifdef USE_NO_SIMD
350
+ return hex_decode_safe_scalar (src ,len ,dst ,escontext );
351
+ #else
352
+ const uint64 tail_idx = len & ~(sizeof (Vector8 )* 2 - 1 );
353
+ uint64 i ;
354
+ bool success = true;
355
+
356
+ /*
357
+ * We must process 2 vectors at a time since the output will be half the
358
+ * length of the input.
359
+ */
360
+ for (i = 0 ;i < tail_idx ;i += sizeof (Vector8 )* 2 )
361
+ {
362
+ Vector8 srcv ;
363
+ Vector8 dstv1 ;
364
+ Vector8 dstv2 ;
365
+
366
+ vector8_load (& srcv , (const uint8 * )& src [i ]);
367
+ success &=hex_decode_simd_helper (srcv ,& dstv1 );
368
+
369
+ vector8_load (& srcv , (const uint8 * )& src [i + sizeof (Vector8 )]);
370
+ success &=hex_decode_simd_helper (srcv ,& dstv2 );
371
+
372
+ vector8_store ((uint8 * )& dst [i /2 ],vector8_pack_16 (dstv1 ,dstv2 ));
373
+ }
374
+
375
+ /*
376
+ * If something didn't look right in the vector path, try again in the
377
+ * scalar path so that we can handle it correctly.
378
+ */
379
+ if (!success )
380
+ i = 0 ;
381
+
382
+ return i /2 + hex_decode_safe_scalar (src + i ,len - i ,dst + i /2 ,escontext );
383
+ #endif
384
+ }
385
+
257
386
static uint64
258
387
hex_enc_len (const char * src ,size_t srclen )
259
388
{