Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3683af6

Browse files
committed
Speed up byteain by not parsing traditional-style input twice.
Instead of laboriously computing the exact output length, use strlento get an upper bound cheaply. (This is still O(N) of course, butthe constant factor is a lot less.) This will typically result inoverallocating the output datum, but that's of little concern sinceit's a short-lived allocation in just about all use-cases.A simple microbenchmark showed about 40% speedup for long inputstrings.While here, make some cosmetic cleanups and add a test case thatcovers the double-backslash code path in byteain and byteaout.Author: Steven Niu <niushiji@gmail.com>Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>Reviewed-by: Stepan Neretin <slpmcf@gmail.com>Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>Discussion:https://postgr.es/m/ca315729-140b-426e-81a6-6cd5cfe7ecc5@gmail.com
1 parent84409ed commit3683af6

File tree

3 files changed

+30
-45
lines changed

3 files changed

+30
-45
lines changed

‎src/backend/utils/adt/bytea.c

Lines changed: 16 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -182,27 +182,21 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
182182
*
183183
*Non-printable characters must be passed as '\nnn' (octal) and are
184184
*converted to internal form. '\' must be passed as '\\'.
185-
*ereport(ERROR, ...) if bad form.
186-
*
187-
*BUGS:
188-
*The input is scanned twice.
189-
*The error checking of input is minimal.
190185
*/
191186
Datum
192187
byteain(PG_FUNCTION_ARGS)
193188
{
194189
char*inputText=PG_GETARG_CSTRING(0);
195190
Node*escontext=fcinfo->context;
191+
size_tlen=strlen(inputText);
192+
size_tbc;
196193
char*tp;
197194
char*rp;
198-
intbc;
199195
bytea*result;
200196

201197
/* Recognize hex input */
202198
if (inputText[0]=='\\'&&inputText[1]=='x')
203199
{
204-
size_tlen=strlen(inputText);
205-
206200
bc= (len-2) /2+VARHDRSZ;/* maximum possible length */
207201
result=palloc(bc);
208202
bc=hex_decode_safe(inputText+2,len-2,VARDATA(result),
@@ -213,70 +207,47 @@ byteain(PG_FUNCTION_ARGS)
213207
}
214208

215209
/* Else, it's the traditional escaped style */
216-
for (bc=0,tp=inputText;*tp!='\0';bc++)
217-
{
218-
if (tp[0]!='\\')
219-
tp++;
220-
elseif ((tp[0]=='\\')&&
221-
(tp[1] >='0'&&tp[1] <='3')&&
222-
(tp[2] >='0'&&tp[2] <='7')&&
223-
(tp[3] >='0'&&tp[3] <='7'))
224-
tp+=4;
225-
elseif ((tp[0]=='\\')&&
226-
(tp[1]=='\\'))
227-
tp+=2;
228-
else
229-
{
230-
/*
231-
* one backslash, not followed by another or ### valid octal
232-
*/
233-
ereturn(escontext, (Datum)0,
234-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
235-
errmsg("invalid input syntax for type %s","bytea")));
236-
}
237-
}
238-
239-
bc+=VARHDRSZ;
240-
241-
result= (bytea*)palloc(bc);
242-
SET_VARSIZE(result,bc);
210+
result= (bytea*)palloc(len+VARHDRSZ);/* maximum possible length */
243211

244212
tp=inputText;
245213
rp=VARDATA(result);
246214
while (*tp!='\0')
247215
{
248216
if (tp[0]!='\\')
249217
*rp++=*tp++;
250-
elseif ((tp[0]=='\\')&&
251-
(tp[1] >='0'&&tp[1] <='3')&&
218+
elseif ((tp[1] >='0'&&tp[1] <='3')&&
252219
(tp[2] >='0'&&tp[2] <='7')&&
253220
(tp[3] >='0'&&tp[3] <='7'))
254221
{
255-
bc=VAL(tp[1]);
256-
bc <<=3;
257-
bc+=VAL(tp[2]);
258-
bc <<=3;
259-
*rp++=bc+VAL(tp[3]);
222+
intv;
223+
224+
v=VAL(tp[1]);
225+
v <<=3;
226+
v+=VAL(tp[2]);
227+
v <<=3;
228+
*rp++=v+VAL(tp[3]);
260229

261230
tp+=4;
262231
}
263-
elseif ((tp[0]=='\\')&&
264-
(tp[1]=='\\'))
232+
elseif (tp[1]=='\\')
265233
{
266234
*rp++='\\';
267235
tp+=2;
268236
}
269237
else
270238
{
271239
/*
272-
*We should never get here. The first pass should not allow it.
240+
*one backslash, not followed by another or ### valid octal
273241
*/
274242
ereturn(escontext, (Datum)0,
275243
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
276244
errmsg("invalid input syntax for type %s","bytea")));
277245
}
278246
}
279247

248+
bc=rp-VARDATA(result);/* actual length */
249+
SET_VARSIZE(result,bc+VARHDRSZ);
250+
280251
PG_RETURN_BYTEA_P(result);
281252
}
282253

‎src/test/regress/expected/strings.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,12 @@ SELECT E'De\\678dBeEf'::bytea;
236236
ERROR: invalid input syntax for type bytea
237237
LINE 1: SELECT E'De\\678dBeEf'::bytea;
238238
^
239+
SELECT E'DeAd\\\\BeEf'::bytea;
240+
bytea
241+
----------------------
242+
\x446541645c42654566
243+
(1 row)
244+
239245
SELECT reverse(''::bytea);
240246
reverse
241247
---------
@@ -291,6 +297,12 @@ SELECT E'De\\123dBeEf'::bytea;
291297
DeSdBeEf
292298
(1 row)
293299

300+
SELECT E'DeAd\\\\BeEf'::bytea;
301+
bytea
302+
------------
303+
DeAd\\BeEf
304+
(1 row)
305+
294306
-- Test non-error-throwing API too
295307
SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
296308
pg_input_is_valid

‎src/test/regress/sql/strings.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ SELECT E'De\\000dBeEf'::bytea;
7676
SELECT E'De\123dBeEf'::bytea;
7777
SELECT E'De\\123dBeEf'::bytea;
7878
SELECT E'De\\678dBeEf'::bytea;
79+
SELECT E'DeAd\\\\BeEf'::bytea;
7980
8081
SELECT reverse(''::bytea);
8182
SELECT reverse('\xaa'::bytea);
@@ -88,6 +89,7 @@ SELECT E'\\xDe00BeEf'::bytea;
8889
SELECT E'DeAdBeEf'::bytea;
8990
SELECT E'De\\000dBeEf'::bytea;
9091
SELECT E'De\\123dBeEf'::bytea;
92+
SELECT E'DeAd\\\\BeEf'::bytea;
9193
9294
-- Test non-error-throwing API too
9395
SELECT pg_input_is_valid(E'\\xDeAdBeE','bytea');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp