Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit27f78d4

Browse files
author
Maksim Milyutin
committed
First incomplete version of lexeme hashing in index
1 parent2f57c4b commit27f78d4

File tree

4 files changed

+49
-19
lines changed

4 files changed

+49
-19
lines changed

‎rum--1.0.sql‎

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,17 @@ RETURNS bytea
8080
AS'MODULE_PATHNAME'
8181
LANGUAGE C IMMUTABLE STRICT;
8282

83+
CREATEFUNCTIONrum_cmp_tslexeme(bytea,bytea)
84+
RETURNSinteger
85+
AS'MODULE_PATHNAME'
86+
LANGUAGE C IMMUTABLE STRICT;
87+
8388
CREATEOPERATOR CLASSrum_tsvector_ops
8489
FOR TYPE tsvector USING rum
8590
AS
8691
OPERATOR1 @@ (tsvector, tsquery),
8792
OPERATOR2<=> (tsvector, tsquery) FORORDER BYpg_catalog.float_ops,
88-
FUNCTION1gin_cmp_tslexeme(text,text),
93+
FUNCTION1rum_cmp_tslexeme(bytea,bytea),
8994
FUNCTION2 rum_extract_tsvector(tsvector,internal,internal,internal,internal),
9095
FUNCTION3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal),
9196
FUNCTION4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
@@ -94,7 +99,7 @@ AS
9499
FUNCTION7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
95100
FUNCTION8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal),
96101
FUNCTION10 rum_ts_join_pos(internal, internal),
97-
STORAGEtext;
102+
STORAGEbytea;
98103
-- timestamp ops
99104

100105
CREATEFUNCTIONtimestamp_distance(timestamp,timestamp)

‎rum.h‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ typedef signed char RumNullCategory;
243243
*/
244244
#defineRumGetDownlink(itup)RumItemPointerGetBlockNumber(&(itup)->t_tid)
245245
#defineRumSetDownlink(itup,blkno)ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
246-
246+
CREATEINDEXrumidxONtest_rumUSINGrum (arum_tsvector_ops);
247247

248248
/*
249249
* Data (posting tree) pages

‎rum_ts_utils.c‎

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
#include"postgres.h"
1313

14+
#include"access/hash.h"
1415
#include"access/htup_details.h"
16+
#include"catalog/pg_collation.h"
1517
#include"catalog/pg_type.h"
1618
#include"funcapi.h"
1719
#include"miscadmin.h"
@@ -25,6 +27,7 @@
2527

2628
#include<math.h>
2729

30+
PG_FUNCTION_INFO_V1(rum_cmp_tslexeme);
2831
PG_FUNCTION_INFO_V1(rum_extract_tsvector);
2932
PG_FUNCTION_INFO_V1(rum_extract_tsquery);
3033
PG_FUNCTION_INFO_V1(rum_tsvector_config);
@@ -503,11 +506,15 @@ rum_extract_tsvector(PG_FUNCTION_ARGS)
503506
for (i=0;i<vector->size;i++)
504507
{
505508
text*txt;
509+
bytea*hash_value;
506510
bytea*posData;
507511
intposDataSize;
508512

509513
txt=cstring_to_text_with_len(STRPTR(vector)+we->pos,we->len);
510-
entries[i]=PointerGetDatum(txt);
514+
hash_value= (bytea*)palloc(VARHDRSZ+sizeof(int32));
515+
SET_VARSIZE(hash_value,VARHDRSZ+sizeof(int32));
516+
*VARDATA(hash_value)=DirectFunctionCall1(hashtext,PointerGetDatum(txt));
517+
entries[i]=PointerGetDatum(hash_value);
511518

512519
if (we->haspos)
513520
{
@@ -586,10 +593,14 @@ rum_extract_tsquery(PG_FUNCTION_ARGS)
586593
for (i=0;i< (*nentries);i++)
587594
{
588595
text*txt;
596+
bytea*hash_value;
589597

590598
txt=cstring_to_text_with_len(GETOPERAND(query)+operands[i]->distance,
591599
operands[i]->length);
592-
entries[i]=PointerGetDatum(txt);
600+
hash_value= (bytea*)palloc(VARHDRSZ+sizeof(int32));
601+
SET_VARSIZE(hash_value,VARHDRSZ+sizeof(int32));
602+
*VARDATA(hash_value)=DirectFunctionCall1(hashtext,PointerGetDatum(txt));
603+
entries[i]=PointerGetDatum(hash_value);
593604
partialmatch[i]=operands[i]->prefix;
594605
(*extra_data)[i]= (Pointer)map_item_operand;
595606
}
@@ -1389,3 +1400,17 @@ rum_ts_join_pos(PG_FUNCTION_ARGS)
13891400

13901401
PG_RETURN_BYTEA_P(result);
13911402
}
1403+
1404+
Datum
1405+
rum_cmp_tslexeme(PG_FUNCTION_ARGS)
1406+
{
1407+
bytea*arg1=PG_GETARG_BYTEA_P(0);
1408+
bytea*arg2=PG_GETARG_BYTEA_P(1);
1409+
int32a=*VARDATA(arg1);
1410+
int32b=*VARDATA(arg2);
1411+
intcmp;
1412+
1413+
cmp= (a>b) ?1 : ((a==b) ?0 :-1);
1414+
1415+
PG_RETURN_INT32(cmp);
1416+
}

‎sql/rum.sql‎

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,17 @@ DELETE FROM tst WHERE i = 5;
100100
VACUUM tst;
101101
INSERT INTO tstSELECT i%10, to_tsvector('simple', substr(md5(i::text),1,1))FROM generate_series(14001,15000) i;
102102

103-
set enable_bitmapscan=off;
104-
explain (costs off)
105-
SELECT a<=> to_tsquery('pg_catalog.english','w:*'),*
106-
FROM test_rum
107-
WHERE a @@ to_tsquery('pg_catalog.english','w:*')
108-
ORDER BY a<=> to_tsquery('pg_catalog.english','w:*');
109-
SELECT a<=> to_tsquery('pg_catalog.english','w:*'),*
110-
FROM test_rum
111-
WHERE a @@ to_tsquery('pg_catalog.english','w:*')
112-
ORDER BY a<=> to_tsquery('pg_catalog.english','w:*');
113-
SELECT a<=> to_tsquery('pg_catalog.english','b:*'),*
114-
FROM test_rum
115-
WHERE a @@ to_tsquery('pg_catalog.english','b:*')
116-
ORDER BY a<=> to_tsquery('pg_catalog.english','b:*');
103+
--set enable_bitmapscan=off;
104+
--explain (costs off)
105+
--SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
106+
--FROM test_rum
107+
--WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
108+
--ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
109+
--SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
110+
--FROM test_rum
111+
--WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
112+
--ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
113+
--SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), *
114+
--FROM test_rum
115+
--WHERE a @@ to_tsquery('pg_catalog.english', 'b:*')
116+
--ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp