Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0b2a45a

Browse files
committed
Compress TID lists when writing GIN tuples to disk
When serializing GIN tuples to tuplesorts during parallel index builds,we can significantly reduce the amount of data by compressing the TIDlists. The GIN opclasses may produce a lot of data (depending on howmany keys are extracted from each row), and the TID compression is veryefficient and effective.If the number of distinct keys is high, the first worker pass (readingdata from the table and writing them into a private tuplesort) may notbenefit from the compression very much. It is likely to spill data todisk before the TID lists get long enough for the compression to help.The second pass (writing the merged data into the shared tuplesort) ismore likely to benefit from compression.The compression can be seen as a way to reduce the amount of disk spaceneeded by the parallel builds, because the data is written twice. Firstinto the per-worker tuplesorts, then into the shared tuplesort.Author: Tomas VondraReviewed-by: Matthias van de Meent, Andy Fan, Kirill ReshkeDiscussion:https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
1 parent9b4bdf8 commit0b2a45a

File tree

2 files changed

+95
-22
lines changed

2 files changed

+95
-22
lines changed

‎src/backend/access/gin/gininsert.c

Lines changed: 94 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,9 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
190190
Relationheap,Relationindex,
191191
intsortmem,boolprogress);
192192

193-
staticDatum_gin_parse_tuple(GinTuple*a,ItemPointerData**items);
193+
staticItemPointer_gin_parse_tuple_items(GinTuple*a);
194+
staticDatum_gin_parse_tuple_key(GinTuple*a);
195+
194196
staticGinTuple*_gin_build_tuple(OffsetNumberattrnum,unsignedcharcategory,
195197
Datumkey,int16typlen,booltypbyval,
196198
ItemPointerData*items,uint32nitems,
@@ -1365,7 +1367,8 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
13651367

13661368
AssertCheckGinBuffer(buffer);
13671369

1368-
key=_gin_parse_tuple(tup,&items);
1370+
key=_gin_parse_tuple_key(tup);
1371+
items=_gin_parse_tuple_items(tup);
13691372

13701373
/* if the buffer is empty, set the fields (and copy the key) */
13711374
if (GinBufferIsEmpty(buffer))
@@ -1401,6 +1404,9 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
14011404

14021405
AssertCheckItemPointers(buffer);
14031406
}
1407+
1408+
/* free the decompressed TID list */
1409+
pfree(items);
14041410
}
14051411

14061412
/*
@@ -1955,6 +1961,15 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
19551961
table_close(heapRel,heapLockmode);
19561962
}
19571963

1964+
/*
1965+
* Used to keep track of compressed TID lists when building a GIN tuple.
1966+
*/
1967+
typedefstruct
1968+
{
1969+
dlist_nodenode;/* linked list pointers */
1970+
GinPostingList*seg;
1971+
}GinSegmentInfo;
1972+
19581973
/*
19591974
* _gin_build_tuple
19601975
*Serialize the state for an index key into a tuple for tuplesort.
@@ -1967,6 +1982,11 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
19671982
* like endianess etc. We could make it a little bit smaller, but it's not
19681983
* worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
19691984
* start of the TID list anyway. So we wouldn't save anything.
1985+
*
1986+
* The TID list is serialized as compressed - it's highly compressible, and
1987+
* we already have ginCompressPostingList for this purpose. The list may be
1988+
* pretty long, so we compress it into multiple segments and then copy all
1989+
* of that into the GIN tuple.
19701990
*/
19711991
staticGinTuple*
19721992
_gin_build_tuple(OffsetNumberattrnum,unsignedcharcategory,
@@ -1980,6 +2000,11 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
19802000
Sizetuplen;
19812001
intkeylen;
19822002

2003+
dlist_mutable_iteriter;
2004+
dlist_headsegments;
2005+
intncompressed;
2006+
Sizecompresslen;
2007+
19832008
/*
19842009
* Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
19852010
* have actual non-empty key. We include varlena headers and \0 bytes for
@@ -2006,12 +2031,34 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
20062031
else
20072032
elog(ERROR,"unexpected typlen value (%d)",typlen);
20082033

2034+
/* compress the item pointers */
2035+
ncompressed=0;
2036+
compresslen=0;
2037+
dlist_init(&segments);
2038+
2039+
/* generate compressed segments of TID list chunks */
2040+
while (ncompressed<nitems)
2041+
{
2042+
intcnt;
2043+
GinSegmentInfo*seginfo=palloc(sizeof(GinSegmentInfo));
2044+
2045+
seginfo->seg=ginCompressPostingList(&items[ncompressed],
2046+
(nitems-ncompressed),
2047+
UINT16_MAX,
2048+
&cnt);
2049+
2050+
ncompressed+=cnt;
2051+
compresslen+=SizeOfGinPostingList(seginfo->seg);
2052+
2053+
dlist_push_tail(&segments,&seginfo->node);
2054+
}
2055+
20092056
/*
20102057
* Determine GIN tuple length with all the data included. Be careful about
2011-
* alignment, to allow direct access to item pointers.
2058+
* alignment, to allow direct access to compressed segments (those require
2059+
* only SHORTALIGN).
20122060
*/
2013-
tuplen=SHORTALIGN(offsetof(GinTuple,data)+keylen)+
2014-
(sizeof(ItemPointerData)*nitems);
2061+
tuplen=SHORTALIGN(offsetof(GinTuple,data)+keylen)+compresslen;
20152062

20162063
*len=tuplen;
20172064

@@ -2061,37 +2108,40 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
20612108
/* finally, copy the TIDs into the array */
20622109
ptr= (char*)tuple+SHORTALIGN(offsetof(GinTuple,data)+keylen);
20632110

2064-
memcpy(ptr,items,sizeof(ItemPointerData)*nitems);
2111+
/* copy in the compressed data, and free the segments */
2112+
dlist_foreach_modify(iter,&segments)
2113+
{
2114+
GinSegmentInfo*seginfo=dlist_container(GinSegmentInfo,node,iter.cur);
2115+
2116+
memcpy(ptr,seginfo->seg,SizeOfGinPostingList(seginfo->seg));
2117+
2118+
ptr+=SizeOfGinPostingList(seginfo->seg);
2119+
2120+
dlist_delete(&seginfo->node);
2121+
2122+
pfree(seginfo->seg);
2123+
pfree(seginfo);
2124+
}
20652125

20662126
returntuple;
20672127
}
20682128

20692129
/*
2070-
*_gin_parse_tuple
2071-
*Deserialize the tuple from thetuplestore representation.
2130+
*_gin_parse_tuple_key
2131+
*Return a Datum representing thekey stored in the tuple.
20722132
*
2073-
* Most of the fields are actually directly accessible, the only thing that
2133+
* Most of thetuplefields are directly accessible, the only thing that
20742134
* needs more care is the key and the TID list.
20752135
*
20762136
* For the key, this returns a regular Datum representing it. It's either the
20772137
* actual key value, or a pointer to the beginning of the data array (which is
20782138
* where the data was copied by _gin_build_tuple).
2079-
*
2080-
* The pointer to the TID list is returned through 'items' (which is simply
2081-
* a pointer to the data array).
20822139
*/
20832140
staticDatum
2084-
_gin_parse_tuple(GinTuple*a,ItemPointerData**items)
2141+
_gin_parse_tuple_key(GinTuple*a)
20852142
{
20862143
Datumkey;
20872144

2088-
if (items)
2089-
{
2090-
char*ptr= (char*)a+SHORTALIGN(offsetof(GinTuple,data)+a->keylen);
2091-
2092-
*items= (ItemPointerData*)ptr;
2093-
}
2094-
20952145
if (a->category!=GIN_CAT_NORM_KEY)
20962146
return (Datum)0;
20972147

@@ -2104,6 +2154,28 @@ _gin_parse_tuple(GinTuple *a, ItemPointerData **items)
21042154
returnPointerGetDatum(a->data);
21052155
}
21062156

2157+
/*
2158+
* _gin_parse_tuple_items
2159+
*Return a pointer to a palloc'd array of decompressed TID array.
2160+
*/
2161+
staticItemPointer
2162+
_gin_parse_tuple_items(GinTuple*a)
2163+
{
2164+
intlen;
2165+
char*ptr;
2166+
intndecoded;
2167+
ItemPointeritems;
2168+
2169+
len=a->tuplen-SHORTALIGN(offsetof(GinTuple,data)+a->keylen);
2170+
ptr= (char*)a+SHORTALIGN(offsetof(GinTuple,data)+a->keylen);
2171+
2172+
items=ginPostingListDecodeAllSegments((GinPostingList*)ptr,len,&ndecoded);
2173+
2174+
Assert(ndecoded==a->nitems);
2175+
2176+
return (ItemPointer)items;
2177+
}
2178+
21072179
/*
21082180
* _gin_compare_tuples
21092181
*Compare GIN tuples, used by tuplesort during parallel index build.
@@ -2139,8 +2211,8 @@ _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
21392211

21402212
if (a->category==GIN_CAT_NORM_KEY)
21412213
{
2142-
keya=_gin_parse_tuple(a,NULL);
2143-
keyb=_gin_parse_tuple(b,NULL);
2214+
keya=_gin_parse_tuple_key(a);
2215+
keyb=_gin_parse_tuple_key(b);
21442216

21452217
r=ApplySortComparator(keya, false,
21462218
keyb, false,

‎src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,7 @@ GinScanEntry
10521052
GinScanKey
10531053
GinScanOpaque
10541054
GinScanOpaqueData
1055+
GinSegmentInfo
10551056
GinState
10561057
GinStatsData
10571058
GinTernaryValue

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp