Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite3f9dca

Browse files
committed
Ensure we allocate NAMEDATALEN bytes for names in Index Only Scans
As an optimization, we store "name" columns as cstrings in btreeindexes.Here we modify it so that Index Only Scans convert these cstrings backto names with NAMEDATALEN bytes rather than storing the cstring in thetuple slot, as was happening previously.Bug: #17855Reported-by: Alexander LakhinReviewed-by: Alexander Lakhin, Tom LaneDiscussion:https://postgr.es/m/17855-5f523e0f9769a566@postgresql.orgBackpatch-through: 12, all supported versions
1 parent56d30fb commite3f9dca

File tree

5 files changed

+141
-9
lines changed

5 files changed

+141
-9
lines changed

‎src/backend/executor/nodeIndexonlyscan.c

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,21 @@
3535
#include"access/tableam.h"
3636
#include"access/tupdesc.h"
3737
#include"access/visibilitymap.h"
38+
#include"catalog/pg_type.h"
3839
#include"executor/execdebug.h"
3940
#include"executor/nodeIndexonlyscan.h"
4041
#include"executor/nodeIndexscan.h"
4142
#include"miscadmin.h"
4243
#include"storage/bufmgr.h"
4344
#include"storage/predicate.h"
45+
#include"utils/builtins.h"
4446
#include"utils/memutils.h"
4547
#include"utils/rel.h"
4648

4749

4850
staticTupleTableSlot*IndexOnlyNext(IndexOnlyScanState*node);
49-
staticvoidStoreIndexTuple(TupleTableSlot*slot,IndexTupleitup,
50-
TupleDescitupdesc);
51+
staticvoidStoreIndexTuple(IndexOnlyScanState*node,TupleTableSlot*slot,
52+
IndexTupleitup,TupleDescitupdesc);
5153

5254

5355
/* ----------------------------------------------------------------
@@ -208,7 +210,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
208210
ExecForceStoreHeapTuple(scandesc->xs_hitup,slot, false);
209211
}
210212
elseif (scandesc->xs_itup)
211-
StoreIndexTuple(slot,scandesc->xs_itup,scandesc->xs_itupdesc);
213+
StoreIndexTuple(node,slot,scandesc->xs_itup,scandesc->xs_itupdesc);
212214
else
213215
elog(ERROR,"no data returned for index-only scan");
214216

@@ -266,7 +268,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
266268
* right now we don't need it elsewhere.
267269
*/
268270
staticvoid
269-
StoreIndexTuple(TupleTableSlot*slot,IndexTupleitup,TupleDescitupdesc)
271+
StoreIndexTuple(IndexOnlyScanState*node,TupleTableSlot*slot,
272+
IndexTupleitup,TupleDescitupdesc)
270273
{
271274
/*
272275
* Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
@@ -279,6 +282,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
279282

280283
ExecClearTuple(slot);
281284
index_deform_tuple(itup,itupdesc,slot->tts_values,slot->tts_isnull);
285+
286+
/*
287+
* Copy all name columns stored as cstrings back into a NAMEDATALEN byte
288+
* sized allocation. We mark this branch as unlikely as generally "name"
289+
* is used only for the system catalogs and this would have to be a user
290+
* query running on those or some other user table with an index on a name
291+
* column.
292+
*/
293+
if (unlikely(node->ioss_NameCStringAttNums!=NULL))
294+
{
295+
intattcount=node->ioss_NameCStringCount;
296+
297+
for (intidx=0;idx<attcount;idx++)
298+
{
299+
intattnum=node->ioss_NameCStringAttNums[idx];
300+
Namename;
301+
302+
/* skip null Datums */
303+
if (slot->tts_isnull[attnum])
304+
continue;
305+
306+
/* allocate the NAMEDATALEN and copy the datum into that memory */
307+
name= (Name)MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
308+
NAMEDATALEN);
309+
310+
/* use namestrcpy to zero-pad all trailing bytes */
311+
namestrcpy(name,DatumGetCString(slot->tts_values[attnum]));
312+
slot->tts_values[attnum]=NameGetDatum(name);
313+
}
314+
}
315+
282316
ExecStoreVirtualTuple(slot);
283317
}
284318

@@ -492,8 +526,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
492526
{
493527
IndexOnlyScanState*indexstate;
494528
RelationcurrentRelation;
529+
RelationindexRelation;
495530
LOCKMODElockmode;
496531
TupleDesctupDesc;
532+
intindnkeyatts;
533+
intnamecount;
497534

498535
/*
499536
* create state structure
@@ -566,7 +603,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
566603

567604
/* Open the index relation. */
568605
lockmode=exec_rt_fetch(node->scan.scanrelid,estate)->rellockmode;
569-
indexstate->ioss_RelationDesc=index_open(node->indexid,lockmode);
606+
indexRelation=index_open(node->indexid,lockmode);
607+
indexstate->ioss_RelationDesc=indexRelation;
570608

571609
/*
572610
* Initialize index-specific scan state
@@ -579,7 +617,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
579617
* build the index scan keys from the index qualification
580618
*/
581619
ExecIndexBuildScanKeys((PlanState*)indexstate,
582-
indexstate->ioss_RelationDesc,
620+
indexRelation,
583621
node->indexqual,
584622
false,
585623
&indexstate->ioss_ScanKeys,
@@ -593,7 +631,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
593631
* any ORDER BY exprs have to be turned into scankeys in the same way
594632
*/
595633
ExecIndexBuildScanKeys((PlanState*)indexstate,
596-
indexstate->ioss_RelationDesc,
634+
indexRelation,
597635
node->indexorderby,
598636
true,
599637
&indexstate->ioss_OrderByKeys,
@@ -622,6 +660,49 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
622660
indexstate->ioss_RuntimeContext=NULL;
623661
}
624662

663+
indexstate->ioss_NameCStringAttNums=NULL;
664+
indnkeyatts=indexRelation->rd_index->indnkeyatts;
665+
namecount=0;
666+
667+
/*
668+
* The "name" type for btree uses text_ops which results in storing
669+
* cstrings in the indexed keys rather than names. Here we detect that in
670+
* a generic way in case other index AMs want to do the same optimization.
671+
* Check for opclasses with an opcintype of NAMEOID and an index tuple
672+
* descriptor with CSTRINGOID. If any of these are found, create an array
673+
* marking the index attribute number of each of them. StoreIndexTuple()
674+
* handles copying the name Datums into a NAMEDATALEN-byte allocation.
675+
*/
676+
677+
/* First, count the number of such index keys */
678+
for (intattnum=0;attnum<indnkeyatts;attnum++)
679+
{
680+
if (indexRelation->rd_att->attrs[attnum].atttypid==CSTRINGOID&&
681+
indexRelation->rd_opcintype[attnum]==NAMEOID)
682+
namecount++;
683+
}
684+
685+
if (namecount>0)
686+
{
687+
intidx=0;
688+
689+
/*
690+
* Now create an array to mark the attribute numbers of the keys that
691+
* need to be converted from cstring to name.
692+
*/
693+
indexstate->ioss_NameCStringAttNums= (AttrNumber*)
694+
palloc(sizeof(AttrNumber)*namecount);
695+
696+
for (intattnum=0;attnum<indnkeyatts;attnum++)
697+
{
698+
if (indexRelation->rd_att->attrs[attnum].atttypid==CSTRINGOID&&
699+
indexRelation->rd_opcintype[attnum]==NAMEOID)
700+
indexstate->ioss_NameCStringAttNums[idx++]= (AttrNumber)attnum;
701+
}
702+
}
703+
704+
indexstate->ioss_NameCStringCount=namecount;
705+
625706
/*
626707
* all done.
627708
*/

‎src/include/catalog/pg_opclass.dat

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,11 @@
9191
# Here's an ugly little hack to save space in the system catalog indexes.
9292
# btree doesn't ordinarily allow a storage type different from input type;
9393
# but cstring and name are the same thing except for trailing padding,
94-
# and we can safely omit that within an index entry. So we declare the
95-
# btree opclass for name as using cstring storage type.
94+
# so we choose to omit that within an index entry. Here we declare the
95+
# btree opclass for name as using cstring storage type. This does require
96+
# that we pad the cstring out with the full NAMEDATALEN bytes when performing
97+
# index-only scans. See corresponding hacks in ExecInitIndexOnlyScan() and
98+
# StoreIndexTuple().
9699
{ opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops',
97100
opcintype => 'name', opckeytype => 'cstring' },
98101

‎src/include/nodes/execnodes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1467,6 +1467,8 @@ typedef struct IndexScanState
14671467
*TableSlot slot for holding tuples fetched from the table
14681468
*VMBuffer buffer in use for visibility map testing, if any
14691469
*PscanLen size of parallel index-only scan descriptor
1470+
*NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN
1471+
*NameCStringCount number of elements in the NameCStringAttNums array
14701472
* ----------------
14711473
*/
14721474
typedefstructIndexOnlyScanState
@@ -1486,6 +1488,8 @@ typedef struct IndexOnlyScanState
14861488
TupleTableSlot*ioss_TableSlot;
14871489
Bufferioss_VMBuffer;
14881490
Sizeioss_PscanLen;
1491+
AttrNumber*ioss_NameCStringAttNums;
1492+
intioss_NameCStringCount;
14891493
}IndexOnlyScanState;
14901494

14911495
/* ----------------

‎src/test/regress/expected/index_including.out

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,3 +399,28 @@ Indexes:
399399
"tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4)
400400

401401
DROP TABLE tbl;
402+
/*
403+
* 10. Test coverage for names stored as cstrings in indexes
404+
*/
405+
CREATE TABLE nametbl (c1 int, c2 name, c3 float);
406+
CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
407+
INSERT INTO nametbl VALUES(1, 'two', 3.0);
408+
VACUUM nametbl;
409+
SET enable_seqscan = 0;
410+
-- Ensure we get an index only scan plan
411+
EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
412+
QUERY PLAN
413+
----------------------------------------------------
414+
Index Only Scan using nametbl_c1_c2_idx on nametbl
415+
Index Cond: ((c2 = 'two'::name) AND (c1 = 1))
416+
(2 rows)
417+
418+
-- Validate the results look sane
419+
SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
420+
c2 | c1 | c3
421+
-----+----+----
422+
two | 1 | 3
423+
(1 row)
424+
425+
RESET enable_seqscan;
426+
DROP TABLE nametbl;

‎src/test/regress/sql/index_including.sql

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,22 @@ ALTER TABLE tbl ALTER c1 TYPE bigint;
217217
ALTERTABLE tbl ALTER c3 TYPEbigint;
218218
\d tbl
219219
DROPTABLE tbl;
220+
221+
/*
222+
* 10. Test coverage for names stored as cstrings in indexes
223+
*/
224+
CREATETABLEnametbl (c1int, c2 name, c3 float);
225+
CREATEINDEXnametbl_c1_c2_idxON nametbl (c2, c1) INCLUDE (c3);
226+
INSERT INTO nametblVALUES(1,'two',3.0);
227+
VACUUM nametbl;
228+
SET enable_seqscan=0;
229+
230+
-- Ensure we get an index only scan plan
231+
EXPLAIN (COSTS OFF)SELECT c2, c1, c3FROM nametblWHERE c2='two'AND c1=1;
232+
233+
-- Validate the results look sane
234+
SELECT c2, c1, c3FROM nametblWHERE c2='two'AND c1=1;
235+
236+
RESET enable_seqscan;
237+
238+
DROPTABLE nametbl;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp