Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0a6ea40

Browse files
committed
Add a hash opclass for type "tid".
Up to now we've not worried much about joins where the join key is arelation's CTID column, reasoning that storing a table's CTIDs in someother table would be pretty useless. However, there are use-cases forthis sort of query involving self-joins, so that argument doesn't reallyhold water.With larger relations, a merge or hash join is desirable. We had a btreeopclass for type "tid", allowing merge joins on CTID, but no hash opclassso that hash joins weren't possible. Add the missing infrastructure.This also potentially enables hash aggregation on "tid", though theuse-cases for that aren't too clear.Discussion:https://postgr.es/m/1853.1545453106@sss.pgh.pa.us
1 parentb5415e3 commit0a6ea40

File tree

10 files changed

+111
-4
lines changed

10 files changed

+111
-4
lines changed

‎src/backend/utils/adt/tid.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include<math.h>
2121
#include<limits.h>
2222

23+
#include"access/hash.h"
2324
#include"access/heapam.h"
2425
#include"access/sysattr.h"
2526
#include"catalog/namespace.h"
@@ -239,6 +240,33 @@ tidsmaller(PG_FUNCTION_ARGS)
239240
PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1,arg2) <=0 ?arg1 :arg2);
240241
}
241242

243+
Datum
244+
hashtid(PG_FUNCTION_ARGS)
245+
{
246+
ItemPointerkey=PG_GETARG_ITEMPOINTER(0);
247+
248+
/*
249+
* While you'll probably have a lot of trouble with a compiler that
250+
* insists on appending pad space to struct ItemPointerData, we can at
251+
* least make this code work, by not using sizeof(ItemPointerData).
252+
* Instead rely on knowing the sizes of the component fields.
253+
*/
254+
returnhash_any((unsignedchar*)key,
255+
sizeof(BlockIdData)+sizeof(OffsetNumber));
256+
}
257+
258+
Datum
259+
hashtidextended(PG_FUNCTION_ARGS)
260+
{
261+
ItemPointerkey=PG_GETARG_ITEMPOINTER(0);
262+
uint64seed=PG_GETARG_INT64(1);
263+
264+
/* As above */
265+
returnhash_any_extended((unsignedchar*)key,
266+
sizeof(BlockIdData)+sizeof(OffsetNumber),
267+
seed);
268+
}
269+
242270

243271
/*
244272
*Functions to get latest tid of a specified tuple.

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO201812202
56+
#defineCATALOG_VERSION_NO201812301
5757

5858
#endif

‎src/include/catalog/pg_amop.dat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,10 @@
10131013
{ amopfamily => 'hash/cid_ops', amoplefttype => 'cid', amoprighttype => 'cid',
10141014
amopstrategy => '1', amopopr => '=(cid,cid)', amopmethod => 'hash' },
10151015

1016+
# tid_ops
1017+
{ amopfamily => 'hash/tid_ops', amoplefttype => 'tid', amoprighttype => 'tid',
1018+
amopstrategy => '1', amopopr => '=(tid,tid)', amopmethod => 'hash' },
1019+
10161020
# text_pattern_ops
10171021
{ amopfamily => 'hash/text_pattern_ops', amoplefttype => 'text',
10181022
amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)',

‎src/include/catalog/pg_amproc.dat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,10 @@
340340
amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' },
341341
{ amprocfamily => 'hash/cid_ops', amproclefttype => 'cid',
342342
amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' },
343+
{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
344+
amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' },
345+
{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
346+
amprocrighttype => 'tid', amprocnum => '2', amproc => 'hashtidextended' },
343347
{ amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
344348
amprocrighttype => 'text', amprocnum => '1', amproc => 'hashtext' },
345349
{ amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',

‎src/include/catalog/pg_opclass.dat

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@
167167
opcintype => 'xid' },
168168
{ opcmethod => 'hash', opcname => 'cid_ops', opcfamily => 'hash/cid_ops',
169169
opcintype => 'cid' },
170+
{ opcmethod => 'hash', opcname => 'tid_ops', opcfamily => 'hash/tid_ops',
171+
opcintype => 'tid' },
170172
{ opcmethod => 'hash', opcname => 'text_pattern_ops',
171173
opcfamily => 'hash/text_pattern_ops', opcintype => 'text',
172174
opcdefault => 'f' },

‎src/include/catalog/pg_operator.dat

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,10 @@
204204
oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
205205

206206
{ oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal',
207-
oprname => '=', oprcanmerge => 't', oprleft => 'tid', oprright => 'tid',
208-
oprresult => 'bool', oprcom => '=(tid,tid)', oprnegate => '<>(tid,tid)',
209-
oprcode => 'tideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
207+
oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'tid',
208+
oprright => 'tid', oprresult => 'bool', oprcom => '=(tid,tid)',
209+
oprnegate => '<>(tid,tid)', oprcode => 'tideq', oprrest => 'eqsel',
210+
oprjoin => 'eqjoinsel' },
210211
{ oid => '402', descr => 'not equal',
211212
oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
212213
oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne',

‎src/include/catalog/pg_opfamily.dat

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@
112112
opfmethod => 'hash', opfname => 'xid_ops' },
113113
{ oid => '2226',
114114
opfmethod => 'hash', opfname => 'cid_ops' },
115+
{ oid => '2227',
116+
opfmethod => 'hash', opfname => 'tid_ops' },
115117
{ oid => '2229',
116118
opfmethod => 'hash', opfname => 'text_pattern_ops' },
117119
{ oid => '2231',

‎src/include/catalog/pg_proc.dat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2484,6 +2484,12 @@
24842484
{ oid => '2796', descr => 'smaller of two',
24852485
proname => 'tidsmaller', prorettype => 'tid', proargtypes => 'tid tid',
24862486
prosrc => 'tidsmaller' },
2487+
{ oid => '2233', descr => 'hash',
2488+
proname => 'hashtid', prorettype => 'int4', proargtypes => 'tid',
2489+
prosrc => 'hashtid' },
2490+
{ oid => '2234', descr => 'hash',
2491+
proname => 'hashtidextended', prorettype => 'int8', proargtypes => 'tid int8',
2492+
prosrc => 'hashtidextended' },
24872493

24882494
{ oid => '1296',
24892495
proname => 'timedate_pl', prolang => '14', prorettype => 'timestamp',

‎src/test/regress/expected/tidscan.out

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
109109
(2 rows)
110110

111111
-- nestloop-with-inner-tidscan joins on tid
112+
SET enable_hashjoin TO off; -- otherwise hash join might win
112113
EXPLAIN (COSTS OFF)
113114
SELECT t1.ctid, t1.*, t2.ctid, t2.*
114115
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
@@ -147,6 +148,7 @@ FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
147148
(0,1) | 1 | (0,1) | 1
148149
(1 row)
149150

151+
RESET enable_hashjoin;
150152
-- exercise backward scan and rewind
151153
BEGIN;
152154
DECLARE c CURSOR FOR
@@ -231,4 +233,48 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
231233
UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
232234
ERROR: cursor "c" is not positioned on a row
233235
ROLLBACK;
236+
-- bulk joins on CTID
237+
-- (these plans don't use TID scans, but this still seems like an
238+
-- appropriate place for these tests)
239+
EXPLAIN (COSTS OFF)
240+
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
241+
QUERY PLAN
242+
----------------------------------------
243+
Aggregate
244+
-> Hash Join
245+
Hash Cond: (t1.ctid = t2.ctid)
246+
-> Seq Scan on tenk1 t1
247+
-> Hash
248+
-> Seq Scan on tenk1 t2
249+
(6 rows)
250+
251+
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
252+
count
253+
-------
254+
10000
255+
(1 row)
256+
257+
SET enable_hashjoin TO off;
258+
EXPLAIN (COSTS OFF)
259+
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
260+
QUERY PLAN
261+
-----------------------------------------
262+
Aggregate
263+
-> Merge Join
264+
Merge Cond: (t1.ctid = t2.ctid)
265+
-> Sort
266+
Sort Key: t1.ctid
267+
-> Seq Scan on tenk1 t1
268+
-> Sort
269+
Sort Key: t2.ctid
270+
-> Seq Scan on tenk1 t2
271+
(9 rows)
272+
273+
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
274+
count
275+
-------
276+
10000
277+
(1 row)
278+
279+
RESET enable_hashjoin;
234280
DROP TABLE tidscan;

‎src/test/regress/sql/tidscan.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ SELECT ctid, * FROM tidscan
4040
WHERE (id=3AND ctidIN ('(0,2)','(0,3)'))OR (ctid='(0,1)'AND id=1);
4141

4242
-- nestloop-with-inner-tidscan joins on tid
43+
SET enable_hashjoin TO off;-- otherwise hash join might win
4344
EXPLAIN (COSTS OFF)
4445
SELECTt1.ctid, t1.*,t2.ctid, t2.*
4546
FROM tidscan t1JOIN tidscan t2ONt1.ctid=t2.ctidWHEREt1.id=1;
@@ -50,6 +51,7 @@ SELECT t1.ctid, t1.*, t2.ctid, t2.*
5051
FROM tidscan t1LEFT JOIN tidscan t2ONt1.ctid=t2.ctidWHEREt1.id=1;
5152
SELECTt1.ctid, t1.*,t2.ctid, t2.*
5253
FROM tidscan t1LEFT JOIN tidscan t2ONt1.ctid=t2.ctidWHEREt1.id=1;
54+
RESET enable_hashjoin;
5355

5456
-- exercise backward scan and rewind
5557
BEGIN;
@@ -80,4 +82,16 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
8082
UPDATE tidscanSET id=-idWHERE CURRENT OF c RETURNING*;
8183
ROLLBACK;
8284

85+
-- bulk joins on CTID
86+
-- (these plans don't use TID scans, but this still seems like an
87+
-- appropriate place for these tests)
88+
EXPLAIN (COSTS OFF)
89+
SELECTcount(*)FROM tenk1 t1JOIN tenk1 t2ONt1.ctid=t2.ctid;
90+
SELECTcount(*)FROM tenk1 t1JOIN tenk1 t2ONt1.ctid=t2.ctid;
91+
SET enable_hashjoin TO off;
92+
EXPLAIN (COSTS OFF)
93+
SELECTcount(*)FROM tenk1 t1JOIN tenk1 t2ONt1.ctid=t2.ctid;
94+
SELECTcount(*)FROM tenk1 t1JOIN tenk1 t2ONt1.ctid=t2.ctid;
95+
RESET enable_hashjoin;
96+
8397
DROPTABLE tidscan;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp