NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commita63224b

committed

Ensure we allocate NAMEDATALEN bytes for names in Index Only Scans

As an optimization, we store "name" columns as cstrings in btreeindexes.Here we modify it so that Index Only Scans convert these cstrings backto names with NAMEDATALEN bytes rather than storing the cstring in thetuple slot, as was happening previously.Bug: #17855Reported-by: Alexander LakhinReviewed-by: Alexander Lakhin, Tom LaneDiscussion:https://postgr.es/m/17855-5f523e0f9769a566@postgresql.orgBackpatch-through: 12, all supported versions

1 parent7562a9b commita63224bCopy full SHA for a63224b

File tree

5 files changed

+141

-9

lines changed

src
- backend/executor
  - nodeIndexonlyscan.c
- include
  - catalog
    - pg_opclass.dat
  - nodes
    - execnodes.h
- test/regress
  - expected
    - index_including.out
  - sql
    - index_including.sql

5 files changed

+141

-9

lines changed

`‎src/backend/executor/nodeIndexonlyscan.c‎`

Lines changed: 88 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -35,18 +35,20 @@`
`35`	`35`	`#include"access/tableam.h"`
`36`	`36`	`#include"access/tupdesc.h"`
`37`	`37`	`#include"access/visibilitymap.h"`
	`38`	`+#include"catalog/pg_type.h"`
`38`	`39`	`#include"executor/executor.h"`
`39`	`40`	`#include"executor/nodeIndexonlyscan.h"`
`40`	`41`	`#include"executor/nodeIndexscan.h"`
`41`	`42`	`#include"miscadmin.h"`
`42`	`43`	`#include"storage/bufmgr.h"`
`43`	`44`	`#include"storage/predicate.h"`
	`45`	`+#include"utils/builtins.h"`
`44`	`46`	`#include"utils/rel.h"`
`45`	`47`
`46`	`48`
`47`	`49`	`staticTupleTableSlotIndexOnlyNext(IndexOnlyScanStatenode);`
`48`		`-staticvoidStoreIndexTuple(TupleTableSlot*slot,IndexTupleitup,`
`49`		`-TupleDescitupdesc);`
	`50`	`+staticvoidStoreIndexTuple(IndexOnlyScanStatenode,TupleTableSlotslot,`
	`51`	`+IndexTupleitup,TupleDescitupdesc);`
`50`	`52`
`51`	`53`
`52`	`54`	`/* ----------------------------------------------------------------`
`@@ -205,7 +207,7 @@ IndexOnlyNext(IndexOnlyScanState *node)`
`205`	`207`	`ExecForceStoreHeapTuple(scandesc->xs_hitup,slot, false);`
`206`	`208`	`}`
`207`	`209`	`elseif (scandesc->xs_itup)`
`208`		`-StoreIndexTuple(slot,scandesc->xs_itup,scandesc->xs_itupdesc);`
	`210`	`+StoreIndexTuple(node,slot,scandesc->xs_itup,scandesc->xs_itupdesc);`
`209`	`211`	`else`
`210`	`212`	`elog(ERROR,"no data returned for index-only scan");`
`211`	`213`
`@@ -263,7 +265,8 @@ IndexOnlyNext(IndexOnlyScanState *node)`
`263`	`265`	`* right now we don't need it elsewhere.`
`264`	`266`	`*/`
`265`	`267`	`staticvoid`
`266`		`-StoreIndexTuple(TupleTableSlot*slot,IndexTupleitup,TupleDescitupdesc)`
	`268`	`+StoreIndexTuple(IndexOnlyScanStatenode,TupleTableSlotslot,`
	`269`	`+IndexTupleitup,TupleDescitupdesc)`
`267`	`270`	`{`
`268`	`271`	`/*`
`269`	`272`	`* Note: we must use the tupdesc supplied by the AM in index_deform_tuple,`
`@@ -276,6 +279,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)`
`276`	`279`
`277`	`280`	`ExecClearTuple(slot);`
`278`	`281`	`index_deform_tuple(itup,itupdesc,slot->tts_values,slot->tts_isnull);`
	`282`	`+`
	`283`	`+/*`
	`284`	`+ * Copy all name columns stored as cstrings back into a NAMEDATALEN byte`
	`285`	`+ * sized allocation. We mark this branch as unlikely as generally "name"`
	`286`	`+ * is used only for the system catalogs and this would have to be a user`
	`287`	`+ * query running on those or some other user table with an index on a name`
	`288`	`+ * column.`
	`289`	`+ */`
	`290`	`+if (unlikely(node->ioss_NameCStringAttNums!=NULL))`
	`291`	`+{`
	`292`	`+intattcount=node->ioss_NameCStringCount;`
	`293`	`+`
	`294`	`+for (intidx=0;idx<attcount;idx++)`
	`295`	`+{`
	`296`	`+intattnum=node->ioss_NameCStringAttNums[idx];`
	`297`	`+Namename;`
	`298`	`+`
	`299`	`+/* skip null Datums */`
	`300`	`+if (slot->tts_isnull[attnum])`
	`301`	`+continue;`
	`302`	`+`
	`303`	`+/* allocate the NAMEDATALEN and copy the datum into that memory */`
	`304`	`+name= (Name)MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,`
	`305`	`+NAMEDATALEN);`
	`306`	`+`
	`307`	`+/* use namestrcpy to zero-pad all trailing bytes */`
	`308`	`+namestrcpy(name,DatumGetCString(slot->tts_values[attnum]));`
	`309`	`+slot->tts_values[attnum]=NameGetDatum(name);`
	`310`	`+}`
	`311`	`+}`
	`312`	`+`
`279`	`313`	`ExecStoreVirtualTuple(slot);`
`280`	`314`	`}`
`281`	`315`
`@@ -473,8 +507,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan node, EState estate, int eflags)`
`473`	`507`	`{`
`474`	`508`	`IndexOnlyScanState*indexstate;`
`475`	`509`	`RelationcurrentRelation;`
	`510`	`+RelationindexRelation;`
`476`	`511`	`LOCKMODElockmode;`
`477`	`512`	`TupleDesctupDesc;`
	`513`	`+intindnkeyatts;`
	`514`	`+intnamecount;`
`478`	`515`
`479`	`516`	`/*`
`480`	`517`	`* create state structure`
`@@ -547,7 +584,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan node, EState estate, int eflags)`
`547`	`584`
`548`	`585`	`/* Open the index relation. */`
`549`	`586`	`lockmode=exec_rt_fetch(node->scan.scanrelid,estate)->rellockmode;`
`550`		`-indexstate->ioss_RelationDesc=index_open(node->indexid,lockmode);`
	`587`	`+indexRelation=index_open(node->indexid,lockmode);`
	`588`	`+indexstate->ioss_RelationDesc=indexRelation;`
`551`	`589`
`552`	`590`	`/*`
`553`	`591`	`* Initialize index-specific scan state`
`@@ -560,7 +598,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan node, EState estate, int eflags)`
`560`	`598`	`* build the index scan keys from the index qualification`
`561`	`599`	`*/`
`562`	`600`	`ExecIndexBuildScanKeys((PlanState*)indexstate,`
`563`		`-indexstate->ioss_RelationDesc,`
	`601`	`+indexRelation,`
`564`	`602`	`node->indexqual,`
`565`	`603`	`false,`
`566`	`604`	`&indexstate->ioss_ScanKeys,`
`@@ -574,7 +612,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan node, EState estate, int eflags)`
`574`	`612`	`* any ORDER BY exprs have to be turned into scankeys in the same way`
`575`	`613`	`*/`
`576`	`614`	`ExecIndexBuildScanKeys((PlanState*)indexstate,`
`577`		`-indexstate->ioss_RelationDesc,`
	`615`	`+indexRelation,`
`578`	`616`	`node->indexorderby,`
`579`	`617`	`true,`
`580`	`618`	`&indexstate->ioss_OrderByKeys,`
`@@ -603,6 +641,49 @@ ExecInitIndexOnlyScan(IndexOnlyScan node, EState estate, int eflags)`
`603`	`641`	`indexstate->ioss_RuntimeContext=NULL;`
`604`	`642`	`}`
`605`	`643`
	`644`	`+indexstate->ioss_NameCStringAttNums=NULL;`
	`645`	`+indnkeyatts=indexRelation->rd_index->indnkeyatts;`
	`646`	`+namecount=0;`
	`647`	`+`
	`648`	`+/*`
	`649`	`+ * The "name" type for btree uses text_ops which results in storing`
	`650`	`+ * cstrings in the indexed keys rather than names. Here we detect that in`
	`651`	`+ * a generic way in case other index AMs want to do the same optimization.`
	`652`	`+ * Check for opclasses with an opcintype of NAMEOID and an index tuple`
	`653`	`+ * descriptor with CSTRINGOID. If any of these are found, create an array`
	`654`	`+ * marking the index attribute number of each of them. StoreIndexTuple()`
	`655`	`+ * handles copying the name Datums into a NAMEDATALEN-byte allocation.`
	`656`	`+ */`
	`657`	`+`
	`658`	`+/* First, count the number of such index keys */`
	`659`	`+for (intattnum=0;attnum<indnkeyatts;attnum++)`
	`660`	`+{`
	`661`	`+if (indexRelation->rd_att->attrs[attnum].atttypid==CSTRINGOID&&`
	`662`	`+indexRelation->rd_opcintype[attnum]==NAMEOID)`
	`663`	`+namecount++;`
	`664`	`+}`
	`665`	`+`
	`666`	`+if (namecount>0)`
	`667`	`+{`
	`668`	`+intidx=0;`
	`669`	`+`
	`670`	`+/*`
	`671`	`+ * Now create an array to mark the attribute numbers of the keys that`
	`672`	`+ * need to be converted from cstring to name.`
	`673`	`+ */`
	`674`	`+indexstate->ioss_NameCStringAttNums= (AttrNumber*)`
	`675`	`+palloc(sizeof(AttrNumber)*namecount);`
	`676`	`+`
	`677`	`+for (intattnum=0;attnum<indnkeyatts;attnum++)`
	`678`	`+{`
	`679`	`+if (indexRelation->rd_att->attrs[attnum].atttypid==CSTRINGOID&&`
	`680`	`+indexRelation->rd_opcintype[attnum]==NAMEOID)`
	`681`	`+indexstate->ioss_NameCStringAttNums[idx++]= (AttrNumber)attnum;`
	`682`	`+}`
	`683`	`+}`
	`684`	`+`
	`685`	`+indexstate->ioss_NameCStringCount=namecount;`
	`686`	`+`
`606`	`687`	`/*`
`607`	`688`	`* all done.`
`608`	`689`	`*/`

`‎src/include/catalog/pg_opclass.dat‎`

Lines changed: 5 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -91,8 +91,11 @@`
`91`	`91`	`# Here's an ugly little hack to save space in the system catalog indexes.`
`92`	`92`	`# btree doesn't ordinarily allow a storage type different from input type;`
`93`	`93`	`# but cstring and name are the same thing except for trailing padding,`
`94`		`-# and we can safely omit that within an index entry. So we declare the`
`95`		`-# btree opclass for name as using cstring storage type.`
	`94`	`+# so we choose to omit that within an index entry. Here we declare the`
	`95`	`+# btree opclass for name as using cstring storage type. This does require`
	`96`	`+# that we pad the cstring out with the full NAMEDATALEN bytes when performing`
	`97`	`+# index-only scans. See corresponding hacks in ExecInitIndexOnlyScan() and`
	`98`	`+# StoreIndexTuple().`
`96`	`99`	`{ opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops',`
`97`	`100`	`opcintype => 'name', opckeytype => 'cstring' },`
`98`	`101`

`‎src/include/nodes/execnodes.h‎`

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1690,6 +1690,8 @@ typedef struct IndexScanState`
`1690`	`1690`	`*TableSlot slot for holding tuples fetched from the table`
`1691`	`1691`	`*VMBuffer buffer in use for visibility map testing, if any`
`1692`	`1692`	`*PscanLen size of parallel index-only scan descriptor`
	`1693`	`+ *NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN`
	`1694`	`+ *NameCStringCount number of elements in the NameCStringAttNums array`
`1693`	`1695`	`* ----------------`
`1694`	`1696`	`*/`
`1695`	`1697`	`typedefstructIndexOnlyScanState`
`@@ -1709,6 +1711,8 @@ typedef struct IndexOnlyScanState`
`1709`	`1711`	`TupleTableSlot*ioss_TableSlot;`
`1710`	`1712`	`Bufferioss_VMBuffer;`
`1711`	`1713`	`Sizeioss_PscanLen;`
	`1714`	`+AttrNumber*ioss_NameCStringAttNums;`
	`1715`	`+intioss_NameCStringCount;`
`1712`	`1716`	`}IndexOnlyScanState;`
`1713`	`1717`
`1714`	`1718`	`/* ----------------`

`‎src/test/regress/expected/index_including.out‎`

Lines changed: 25 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -398,3 +398,28 @@ Indexes:`
`398`	`398`	`"tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4)`
`399`	`399`
`400`	`400`	`DROP TABLE tbl;`
	`401`	`+/*`
	`402`	`+ * 10. Test coverage for names stored as cstrings in indexes`
	`403`	`+ */`
	`404`	`+CREATE TABLE nametbl (c1 int, c2 name, c3 float);`
	`405`	`+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);`
	`406`	`+INSERT INTO nametbl VALUES(1, 'two', 3.0);`
	`407`	`+VACUUM nametbl;`
	`408`	`+SET enable_seqscan = 0;`
	`409`	`+-- Ensure we get an index only scan plan`
	`410`	`+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;`
	`411`	`+ QUERY PLAN`
	`412`	`+----------------------------------------------------`
	`413`	`+ Index Only Scan using nametbl_c1_c2_idx on nametbl`
	`414`	`+ Index Cond: ((c2 = 'two'::name) AND (c1 = 1))`
	`415`	`+(2 rows)`
	`416`	`+`
	`417`	`+-- Validate the results look sane`
	`418`	`+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;`
	`419`	`+ c2 \| c1 \| c3`
	`420`	`+-----+----+----`
	`421`	`+ two \| 1 \| 3`
	`422`	`+(1 row)`
	`423`	`+`
	`424`	`+RESET enable_seqscan;`
	`425`	`+DROP TABLE nametbl;`

`‎src/test/regress/sql/index_including.sql‎`

Lines changed: 19 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -217,3 +217,22 @@ ALTER TABLE tbl ALTER c1 TYPE bigint;`
`217`	`217`	`ALTERTABLE tbl ALTER c3 TYPEbigint;`
`218`	`218`	`\d tbl`
`219`	`219`	`DROPTABLE tbl;`
	`220`	`+`
	`221`	`+/*`
	`222`	`+ * 10. Test coverage for names stored as cstrings in indexes`
	`223`	`+*/`
	`224`	`+CREATETABLEnametbl (c1int, c2 name, c3 float);`
	`225`	`+CREATEINDEXnametbl_c1_c2_idxON nametbl (c2, c1) INCLUDE (c3);`
	`226`	`+INSERT INTO nametblVALUES(1,'two',3.0);`
	`227`	`+VACUUM nametbl;`
	`228`	`+SET enable_seqscan=0;`
	`229`	`+`
	`230`	`+-- Ensure we get an index only scan plan`
	`231`	`+EXPLAIN (COSTS OFF)SELECT c2, c1, c3FROM nametblWHERE c2='two'AND c1=1;`
	`232`	`+`
	`233`	`+-- Validate the results look sane`
	`234`	`+SELECT c2, c1, c3FROM nametblWHERE c2='two'AND c1=1;`
	`235`	`+`
	`236`	`+RESET enable_seqscan;`
	`237`	`+`
	`238`	`+DROPTABLE nametbl;`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commita63224b

File tree

5 files changed

5 files changed

`‎src/backend/executor/nodeIndexonlyscan.c‎`

`‎src/include/catalog/pg_opclass.dat‎`

`‎src/include/nodes/execnodes.h‎`

`‎src/test/regress/expected/index_including.out‎`

`‎src/test/regress/sql/index_including.sql‎`

0 commit comments