Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb6ef167

Browse files
committed
Introduce optimized routine for linear searches of arrays
Use SSE2 intrinsics to speed up the search, where available. Otherwise,use a simple 'for' loop. The motivation to add this now is to speed upXidInMVCCSnapshot(), which is the reason only unsigned 32-bit integerarrays are optimized. Other types are left for future work, as is theextension of this technique to non-x86 platforms.Nathan BossartReviewed by: Andres Freund, Bharath Rupireddy, Masahiko SawadaDiscussion:https://postgr.es/m/20220713170950.GA3116318%40nathanxps13
1 parent356dd2c commitb6ef167

File tree

9 files changed

+215
-0
lines changed

9 files changed

+215
-0
lines changed

‎src/include/port/pg_lfind.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* pg_lfind.h
4+
* Optimized linear search routines.
5+
*
6+
* Copyright (c) 2022, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* src/include/port/pg_lfind.h
10+
*
11+
*-------------------------------------------------------------------------
12+
*/
13+
#ifndefPG_LFIND_H
14+
#definePG_LFIND_H
15+
16+
#include"port/simd.h"
17+
18+
/*
19+
* pg_lfind32
20+
*
21+
* Return true if there is an element in 'base' that equals 'key', otherwise
22+
* return false.
23+
*/
24+
staticinlinebool
25+
pg_lfind32(uint32key,uint32*base,uint32nelem)
26+
{
27+
uint32i=0;
28+
29+
/* Use SIMD intrinsics where available. */
30+
#ifdefUSE_SSE2
31+
32+
/*
33+
* A 16-byte register only has four 4-byte lanes. For better
34+
* instruction-level parallelism, each loop iteration operates on a block
35+
* of four registers. Testing has showed this is ~40% faster than using a
36+
* block of two registers.
37+
*/
38+
const__m128ikeys=_mm_set1_epi32(key);/* load 4 copies of key */
39+
uint32iterations=nelem& ~0xF;/* round down to multiple of 16 */
40+
41+
#if defined(USE_ASSERT_CHECKING)
42+
boolassert_result= false;
43+
44+
/* pre-compute the result for assert checking */
45+
for (i=0;i<nelem;i++)
46+
{
47+
if (key==base[i])
48+
{
49+
assert_result= true;
50+
break;
51+
}
52+
}
53+
#endif
54+
55+
for (i=0;i<iterations;i+=16)
56+
{
57+
/* load the next block into 4 registers holding 4 values each */
58+
const__m128ivals1=_mm_loadu_si128((__m128i*)&base[i]);
59+
const__m128ivals2=_mm_loadu_si128((__m128i*)&base[i+4]);
60+
const__m128ivals3=_mm_loadu_si128((__m128i*)&base[i+8]);
61+
const__m128ivals4=_mm_loadu_si128((__m128i*)&base[i+12]);
62+
63+
/* compare each value to the key */
64+
const__m128iresult1=_mm_cmpeq_epi32(keys,vals1);
65+
const__m128iresult2=_mm_cmpeq_epi32(keys,vals2);
66+
const__m128iresult3=_mm_cmpeq_epi32(keys,vals3);
67+
const__m128iresult4=_mm_cmpeq_epi32(keys,vals4);
68+
69+
/* combine the results into a single variable */
70+
const__m128itmp1=_mm_or_si128(result1,result2);
71+
const__m128itmp2=_mm_or_si128(result3,result4);
72+
const__m128iresult=_mm_or_si128(tmp1,tmp2);
73+
74+
/* see if there was a match */
75+
if (_mm_movemask_epi8(result)!=0)
76+
{
77+
#if defined(USE_ASSERT_CHECKING)
78+
Assert(assert_result== true);
79+
#endif
80+
return true;
81+
}
82+
}
83+
#endif/* USE_SSE2 */
84+
85+
/* Process the remaining elements one at a time. */
86+
for (;i<nelem;i++)
87+
{
88+
if (key==base[i])
89+
{
90+
#if defined(USE_SSE2)&& defined(USE_ASSERT_CHECKING)
91+
Assert(assert_result== true);
92+
#endif
93+
return true;
94+
}
95+
}
96+
97+
#if defined(USE_SSE2)&& defined(USE_ASSERT_CHECKING)
98+
Assert(assert_result== false);
99+
#endif
100+
return false;
101+
}
102+
103+
#endif/* PG_LFIND_H */

‎src/test/modules/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ SUBDIRS = \
1919
test_extensions\
2020
test_ginpostinglist\
2121
test_integerset\
22+
test_lfind\
2223
test_misc\
2324
test_oat_hooks\
2425
test_parser\
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Generated subdirectories
2+
/log/
3+
/results/
4+
/tmp_check/

‎src/test/modules/test_lfind/Makefile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# src/test/modules/test_lfind/Makefile
2+
3+
MODULE_big = test_lfind
4+
OBJS =\
5+
$(WIN32RES)\
6+
test_lfind.o
7+
PGFILEDESC = "test_lfind - test code for optimized linear search functions"
8+
9+
EXTENSION = test_lfind
10+
DATA = test_lfind--1.0.sql
11+
12+
REGRESS = test_lfind
13+
14+
ifdefUSE_PGXS
15+
PG_CONFIG = pg_config
16+
PGXS :=$(shell$(PG_CONFIG) --pgxs)
17+
include$(PGXS)
18+
else
19+
subdir = src/test/modules/test_lfind
20+
top_builddir = ../../../..
21+
include$(top_builddir)/src/Makefile.global
22+
include$(top_srcdir)/contrib/contrib-global.mk
23+
endif
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
CREATE EXTENSION test_lfind;
2+
--
3+
-- These tests don't produce any interesting output. We're checking that
4+
-- the operations complete without crashing or hanging and that none of their
5+
-- internal sanity tests fail.
6+
--
7+
SELECT test_lfind();
8+
test_lfind
9+
------------
10+
11+
(1 row)
12+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE EXTENSION test_lfind;
2+
3+
--
4+
-- These tests don't produce any interesting output. We're checking that
5+
-- the operations complete without crashing or hanging and that none of their
6+
-- internal sanity tests fail.
7+
--
8+
SELECT test_lfind();
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/* src/test/modules/test_lfind/test_lfind--1.0.sql*/
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use"CREATE EXTENSION test_lfind" to load this file. \quit
5+
6+
CREATEFUNCTIONtest_lfind()
7+
RETURNSpg_catalog.void
8+
AS'MODULE_PATHNAME' LANGUAGE C;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*--------------------------------------------------------------------------
2+
*
3+
* test_lfind.c
4+
*Test correctness of optimized linear search functions.
5+
*
6+
* Copyright (c) 2022, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
*src/test/modules/test_lfind/test_lfind.c
10+
*
11+
* -------------------------------------------------------------------------
12+
*/
13+
14+
#include"postgres.h"
15+
16+
#include"fmgr.h"
17+
#include"port/pg_lfind.h"
18+
19+
PG_MODULE_MAGIC;
20+
21+
PG_FUNCTION_INFO_V1(test_lfind);
22+
23+
Datum
24+
test_lfind(PG_FUNCTION_ARGS)
25+
{
26+
#defineTEST_ARRAY_SIZE 135
27+
uint32test_array[TEST_ARRAY_SIZE]= {0};
28+
29+
test_array[8]=1;
30+
test_array[64]=2;
31+
test_array[TEST_ARRAY_SIZE-1]=3;
32+
33+
if (pg_lfind32(1,test_array,4))
34+
elog(ERROR,"pg_lfind32() found nonexistent element");
35+
if (!pg_lfind32(1,test_array,TEST_ARRAY_SIZE))
36+
elog(ERROR,"pg_lfind32() did not find existing element");
37+
38+
if (pg_lfind32(2,test_array,32))
39+
elog(ERROR,"pg_lfind32() found nonexistent element");
40+
if (!pg_lfind32(2,test_array,TEST_ARRAY_SIZE))
41+
elog(ERROR,"pg_lfind32() did not find existing element");
42+
43+
if (pg_lfind32(3,test_array,96))
44+
elog(ERROR,"pg_lfind32() found nonexistent element");
45+
if (!pg_lfind32(3,test_array,TEST_ARRAY_SIZE))
46+
elog(ERROR,"pg_lfind32() did not find existing element");
47+
48+
if (pg_lfind32(4,test_array,TEST_ARRAY_SIZE))
49+
elog(ERROR,"pg_lfind32() found nonexistent element");
50+
51+
PG_RETURN_VOID();
52+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
comment = 'Test code for optimized linear search functions'
2+
default_version = '1.0'
3+
module_pathname = '$libdir/test_lfind'
4+
relocatable = true

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp