Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit26a944c

Browse files
committed
Adjust bytea get_bit/set_bit to use int8 not int4 for bit numbering.
Since the existing bit number argument can't exceed INT32_MAX, it'snot possible for these functions to manipulate bits beyond the first256MB of a bytea value. Lift that restriction by redeclaring thebit number arguments as int8 (which requires a catversion bump,hence is not back-patchable).The similarly-named functions for bit/varbit don't really have aproblem because we restrict those types to at most VARBITMAXLEN bits;hence leave them alone.While here, extend the encode/decode functions in utils/adt/encode.cto allow dealing with values wider than 1GB. This is not a live bugor restriction in current usage, because no input could be more than1GB, and since none of the encoders can expand a string more than 4X,the result size couldn't overflow uint32. But it might be desirableto support more in future, so make the input length values size_tand the potential-output-length values uint64.Also add some test cases to improve the miserable code coverageof these functions.Movead Li, editorialized some by me; also reviewed by Ashutosh BapatDiscussion:https://postgr.es/m/20200312115135445367128@highgo.ca
1 parent9c74ceb commit26a944c

File tree

8 files changed

+217
-71
lines changed

8 files changed

+217
-71
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2905,7 +2905,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
29052905
<indexterm>
29062906
<primary>get_bit</primary>
29072907
</indexterm>
2908-
<literal><function>get_bit(<parameter>bytes</parameter> <type>bytea</type>, <parameter>offset</parameter> <type>int</type>)</function></literal>
2908+
<literal><function>get_bit(<parameter>bytes</parameter> <type>bytea</type>, <parameter>offset</parameter> <type>bigint</type>)</function></literal>
29092909
</entry>
29102910
<entry><type>int</type></entry>
29112911
<entry>
@@ -2990,7 +2990,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
29902990
<primary>set_bit</primary>
29912991
</indexterm>
29922992
<literal><function>set_bit(<parameter>bytes</parameter> <type>bytea</type>,
2993-
<parameter>offset</parameter> <type>int</type>,
2993+
<parameter>offset</parameter> <type>bigint</type>,
29942994
<parameter>newvalue</parameter> <type>int</type>)</function></literal>
29952995
</entry>
29962996
<entry><type>bytea</type></entry>

‎src/backend/utils/adt/encode.c

Lines changed: 85 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,24 @@
1616
#include<ctype.h>
1717

1818
#include"utils/builtins.h"
19+
#include"utils/memutils.h"
1920

2021

22+
/*
23+
* Encoding conversion API.
24+
* encode_len() and decode_len() compute the amount of space needed, while
25+
* encode() and decode() perform the actual conversions. It is okay for
26+
* the _len functions to return an overestimate, but not an underestimate.
27+
* (Having said that, large overestimates could cause unnecessary errors,
28+
* so it's better to get it right.) The conversion routines write to the
29+
* buffer at *res and return the true length of their output.
30+
*/
2131
structpg_encoding
2232
{
23-
unsigned(*encode_len) (constchar*data,unsigneddlen);
24-
unsigned(*decode_len) (constchar*data,unsigneddlen);
25-
unsigned(*encode) (constchar*data,unsigneddlen,char*res);
26-
unsigned(*decode) (constchar*data,unsigneddlen,char*res);
33+
uint64(*encode_len) (constchar*data,size_tdlen);
34+
uint64(*decode_len) (constchar*data,size_tdlen);
35+
uint64(*encode) (constchar*data,size_tdlen,char*res);
36+
uint64(*decode) (constchar*data,size_tdlen,char*res);
2737
};
2838

2939
staticconststructpg_encoding*pg_find_encoding(constchar*name);
@@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS)
3949
Datumname=PG_GETARG_DATUM(1);
4050
text*result;
4151
char*namebuf;
42-
intdatalen,
43-
resultlen,
44-
res;
52+
char*dataptr;
53+
size_tdatalen;
54+
uint64resultlen;
55+
uint64res;
4556
conststructpg_encoding*enc;
4657

47-
datalen=VARSIZE_ANY_EXHDR(data);
48-
4958
namebuf=TextDatumGetCString(name);
5059

5160
enc=pg_find_encoding(namebuf);
@@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS)
5463
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5564
errmsg("unrecognized encoding: \"%s\"",namebuf)));
5665

57-
resultlen=enc->encode_len(VARDATA_ANY(data),datalen);
66+
dataptr=VARDATA_ANY(data);
67+
datalen=VARSIZE_ANY_EXHDR(data);
68+
69+
resultlen=enc->encode_len(dataptr,datalen);
70+
71+
/*
72+
* resultlen possibly overflows uint32, therefore on 32-bit machines it's
73+
* unsafe to rely on palloc's internal check.
74+
*/
75+
if (resultlen>MaxAllocSize-VARHDRSZ)
76+
ereport(ERROR,
77+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
78+
errmsg("result of encoding conversion is too large")));
79+
5880
result=palloc(VARHDRSZ+resultlen);
5981

60-
res=enc->encode(VARDATA_ANY(data),datalen,VARDATA(result));
82+
res=enc->encode(dataptr,datalen,VARDATA(result));
6183

6284
/* Make this FATAL 'cause we've trodden on memory ... */
6385
if (res>resultlen)
@@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS)
7597
Datumname=PG_GETARG_DATUM(1);
7698
bytea*result;
7799
char*namebuf;
78-
intdatalen,
79-
resultlen,
80-
res;
100+
char*dataptr;
101+
size_tdatalen;
102+
uint64resultlen;
103+
uint64res;
81104
conststructpg_encoding*enc;
82105

83-
datalen=VARSIZE_ANY_EXHDR(data);
84-
85106
namebuf=TextDatumGetCString(name);
86107

87108
enc=pg_find_encoding(namebuf);
@@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS)
90111
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
91112
errmsg("unrecognized encoding: \"%s\"",namebuf)));
92113

93-
resultlen=enc->decode_len(VARDATA_ANY(data),datalen);
114+
dataptr=VARDATA_ANY(data);
115+
datalen=VARSIZE_ANY_EXHDR(data);
116+
117+
resultlen=enc->decode_len(dataptr,datalen);
118+
119+
/*
120+
* resultlen possibly overflows uint32, therefore on 32-bit machines it's
121+
* unsafe to rely on palloc's internal check.
122+
*/
123+
if (resultlen>MaxAllocSize-VARHDRSZ)
124+
ereport(ERROR,
125+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
126+
errmsg("result of decoding conversion is too large")));
127+
94128
result=palloc(VARHDRSZ+resultlen);
95129

96-
res=enc->decode(VARDATA_ANY(data),datalen,VARDATA(result));
130+
res=enc->decode(dataptr,datalen,VARDATA(result));
97131

98132
/* Make this FATAL 'cause we've trodden on memory ... */
99133
if (res>resultlen)
@@ -122,8 +156,8 @@ static const int8 hexlookup[128] = {
122156
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
123157
};
124158

125-
unsigned
126-
hex_encode(constchar*src,unsignedlen,char*dst)
159+
uint64
160+
hex_encode(constchar*src,size_tlen,char*dst)
127161
{
128162
constchar*end=src+len;
129163

@@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst)
133167
*dst++=hextbl[*src&0xF];
134168
src++;
135169
}
136-
returnlen*2;
170+
return(uint64)len*2;
137171
}
138172

139173
staticinlinechar
@@ -152,8 +186,8 @@ get_hex(char c)
152186
return (char)res;
153187
}
154188

155-
unsigned
156-
hex_decode(constchar*src,unsignedlen,char*dst)
189+
uint64
190+
hex_decode(constchar*src,size_tlen,char*dst)
157191
{
158192
constchar*s,
159193
*srcend;
@@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst)
184218
returnp-dst;
185219
}
186220

187-
staticunsigned
188-
hex_enc_len(constchar*src,unsignedsrclen)
221+
staticuint64
222+
hex_enc_len(constchar*src,size_tsrclen)
189223
{
190-
returnsrclen <<1;
224+
return(uint64)srclen <<1;
191225
}
192226

193-
staticunsigned
194-
hex_dec_len(constchar*src,unsignedsrclen)
227+
staticuint64
228+
hex_dec_len(constchar*src,size_tsrclen)
195229
{
196-
returnsrclen >>1;
230+
return(uint64)srclen >>1;
197231
}
198232

199233
/*
@@ -214,8 +248,8 @@ static const int8 b64lookup[128] = {
214248
41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,
215249
};
216250

217-
staticunsigned
218-
pg_base64_encode(constchar*src,unsignedlen,char*dst)
251+
staticuint64
252+
pg_base64_encode(constchar*src,size_tlen,char*dst)
219253
{
220254
char*p,
221255
*lend=dst+76;
@@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst)
261295
returnp-dst;
262296
}
263297

264-
staticunsigned
265-
pg_base64_decode(constchar*src,unsignedlen,char*dst)
298+
staticuint64
299+
pg_base64_decode(constchar*src,size_tlen,char*dst)
266300
{
267301
constchar*srcend=src+len,
268302
*s=src;
@@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst)
331365
}
332366

333367

334-
staticunsigned
335-
pg_base64_enc_len(constchar*src,unsignedsrclen)
368+
staticuint64
369+
pg_base64_enc_len(constchar*src,size_tsrclen)
336370
{
337371
/* 3 bytes will be converted to 4, linefeed after 76 chars */
338-
return (srclen+2)*4 /3+srclen / (76*3 /4);
372+
return ((uint64)srclen+2)*4 /3+ (uint64)srclen / (76*3 /4);
339373
}
340374

341-
staticunsigned
342-
pg_base64_dec_len(constchar*src,unsignedsrclen)
375+
staticuint64
376+
pg_base64_dec_len(constchar*src,size_tsrclen)
343377
{
344-
return (srclen*3) >>2;
378+
return ((uint64)srclen*3) >>2;
345379
}
346380

347381
/*
@@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen)
361395
#defineVAL(CH)((CH) - '0')
362396
#defineDIG(VAL)((VAL) + '0')
363397

364-
staticunsigned
365-
esc_encode(constchar*src,unsignedsrclen,char*dst)
398+
staticuint64
399+
esc_encode(constchar*src,size_tsrclen,char*dst)
366400
{
367401
constchar*end=src+srclen;
368402
char*rp=dst;
369-
intlen=0;
403+
uint64len=0;
370404

371405
while (src<end)
372406
{
@@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst)
400434
returnlen;
401435
}
402436

403-
staticunsigned
404-
esc_decode(constchar*src,unsignedsrclen,char*dst)
437+
staticuint64
438+
esc_decode(constchar*src,size_tsrclen,char*dst)
405439
{
406440
constchar*end=src+srclen;
407441
char*rp=dst;
408-
intlen=0;
442+
uint64len=0;
409443

410444
while (src<end)
411445
{
@@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst)
448482
returnlen;
449483
}
450484

451-
staticunsigned
452-
esc_enc_len(constchar*src,unsignedsrclen)
485+
staticuint64
486+
esc_enc_len(constchar*src,size_tsrclen)
453487
{
454488
constchar*end=src+srclen;
455-
intlen=0;
489+
uint64len=0;
456490

457491
while (src<end)
458492
{
@@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen)
469503
returnlen;
470504
}
471505

472-
staticunsigned
473-
esc_dec_len(constchar*src,unsignedsrclen)
506+
staticuint64
507+
esc_dec_len(constchar*src,size_tsrclen)
474508
{
475509
constchar*end=src+srclen;
476-
intlen=0;
510+
uint64len=0;
477511

478512
while (src<end)
479513
{

‎src/backend/utils/adt/varlena.c

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ byteaout(PG_FUNCTION_ARGS)
389389
{
390390
/* Print traditional escaped format */
391391
char*vp;
392-
intlen;
392+
uint64len;
393393
inti;
394394

395395
len=1;/* empty string has 1 char */
@@ -403,7 +403,18 @@ byteaout(PG_FUNCTION_ARGS)
403403
else
404404
len++;
405405
}
406+
407+
/*
408+
* In principle len can't overflow uint32 if the input fit in 1GB, but
409+
* for safety let's check rather than relying on palloc's internal
410+
* check.
411+
*/
412+
if (len>MaxAllocSize)
413+
ereport(ERROR,
414+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
415+
errmsg_internal("result of bytea output conversion is too large")));
406416
rp=result= (char*)palloc(len);
417+
407418
vp=VARDATA_ANY(vlena);
408419
for (i=VARSIZE_ANY_EXHDR(vlena);i!=0;i--,vp++)
409420
{
@@ -3456,22 +3467,23 @@ Datum
34563467
byteaGetBit(PG_FUNCTION_ARGS)
34573468
{
34583469
bytea*v=PG_GETARG_BYTEA_PP(0);
3459-
int32n=PG_GETARG_INT32(1);
3470+
int64n=PG_GETARG_INT64(1);
34603471
intbyteNo,
34613472
bitNo;
34623473
intlen;
34633474
intbyte;
34643475

34653476
len=VARSIZE_ANY_EXHDR(v);
34663477

3467-
if (n<0||n >=len*8)
3478+
if (n<0||n >=(int64)len*8)
34683479
ereport(ERROR,
34693480
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3470-
errmsg("index %d out of valid range, 0..%d",
3471-
n,len*8-1)));
3481+
errmsg("index %lld out of valid range, 0..%lld",
3482+
(long long)n, (long long)len*8-1)));
34723483

3473-
byteNo=n /8;
3474-
bitNo=n %8;
3484+
/* n/8 is now known < len, so safe to cast to int */
3485+
byteNo= (int) (n /8);
3486+
bitNo= (int) (n %8);
34753487

34763488
byte= ((unsignedchar*)VARDATA_ANY(v))[byteNo];
34773489

@@ -3525,7 +3537,7 @@ Datum
35253537
byteaSetBit(PG_FUNCTION_ARGS)
35263538
{
35273539
bytea*res=PG_GETARG_BYTEA_P_COPY(0);
3528-
int32n=PG_GETARG_INT32(1);
3540+
int64n=PG_GETARG_INT64(1);
35293541
int32newBit=PG_GETARG_INT32(2);
35303542
intlen;
35313543
intoldByte,
@@ -3535,14 +3547,15 @@ byteaSetBit(PG_FUNCTION_ARGS)
35353547

35363548
len=VARSIZE(res)-VARHDRSZ;
35373549

3538-
if (n<0||n >=len*8)
3550+
if (n<0||n >=(int64)len*8)
35393551
ereport(ERROR,
35403552
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3541-
errmsg("index %d out of valid range, 0..%d",
3542-
n,len*8-1)));
3553+
errmsg("index %lld out of valid range, 0..%lld",
3554+
(long long)n, (long long)len*8-1)));
35433555

3544-
byteNo=n /8;
3545-
bitNo=n %8;
3556+
/* n/8 is now known < len, so safe to cast to int */
3557+
byteNo= (int) (n /8);
3558+
bitNo= (int) (n %8);
35463559

35473560
/*
35483561
* sanity check!

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO202004062
56+
#defineCATALOG_VERSION_NO202004071
5757

5858
#endif

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp