Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9b20e54

Browse files
committed
Merge branch 'master' into stable
2 parentsc98c15f +a13fdc4 commit9b20e54

File tree

5 files changed

+69
-126
lines changed

5 files changed

+69
-126
lines changed

‎LICENSE

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
pg_tsparser is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses.
2+
3+
Copyright (c) 2016-2018, Postgres Professional
4+
5+
Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies.
6+
7+
IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8+
9+
POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

‎README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ SELECT to_tsvector('english', 'rel-3.2-A') as def_parser,
3434

3535
##License
3636

37-
This module available under thesamelicense as
37+
This module available under the[license](LICENSE) similar to
3838
[PostgreSQL](http://www.postgresql.org/about/licence/).
3939

4040
##Installation

‎expected/pg_tsparser.out

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,31 @@ SELECT to_tsvector('english_ts', 'test2.com');
236236
'com':3 'test2':2 'test2.com':1
237237
(1 row)
238238

239+
-- Test non-ASCII symbols
240+
SELECT * from ts_parse('tsparser', 'аб_вгд 12_абв 12-абв абв.рф абв2.рф');
241+
tokid | token
242+
-------+--------
243+
17 | аб_вгд
244+
10 | аб
245+
12 | _
246+
10 | вгд
247+
12 |
248+
15 | 12_абв
249+
9 | 12
250+
12 | _
251+
10 | абв
252+
12 |
253+
15 | 12-абв
254+
9 | 12
255+
12 | -
256+
10 | абв
257+
12 |
258+
2 | абв
259+
12 | .
260+
2 | рф
261+
12 |
262+
3 | абв2
263+
12 | .
264+
2 | рф
265+
(22 rows)
266+

‎sql/pg_tsparser.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,6 @@ SELECT to_tsvector('english_ts', '12_abc');
2626
SELECT to_tsvector('english_ts','12-abc');
2727
SELECT to_tsvector('english_ts','test.com');
2828
SELECT to_tsvector('english_ts','test2.com');
29+
30+
-- Test non-ASCII symbols
31+
SELECT*from ts_parse('tsparser','аб_вгд 12_абв 12-абв абв.рф абв2.рф');

‎tsparser.c

Lines changed: 28 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,9 @@ typedef struct TParser
249249
/* string and position information */
250250
char*str;/* multibyte string */
251251
intlenstr;/* length of mbstring */
252-
#ifdefUSE_WIDE_UPPER_LOWER
253252
wchar_t*wstr;/* wide character string */
254253
pg_wchar*pgwstr;/* wide character string for C-locale */
255254
boolusewide;
256-
#endif
257255

258256
/* State of parse */
259257
intcharmaxlen;
@@ -302,8 +300,6 @@ TParserInit(char *str, int len)
302300
prs->str=str;
303301
prs->lenstr=len;
304302

305-
#ifdefUSE_WIDE_UPPER_LOWER
306-
307303
/*
308304
* Use wide char code only when max encoding length > 1.
309305
*/
@@ -331,7 +327,6 @@ TParserInit(char *str, int len)
331327
}
332328
else
333329
prs->usewide= false;
334-
#endif
335330

336331
prs->state=newTParserPosition(NULL);
337332
prs->state->state=TPS_Base;
@@ -368,15 +363,12 @@ TParserCopyInit(const TParser *orig)
368363
prs->charmaxlen=orig->charmaxlen;
369364
prs->str=orig->str+orig->state->posbyte;
370365
prs->lenstr=orig->lenstr-orig->state->posbyte;
371-
372-
#ifdefUSE_WIDE_UPPER_LOWER
373366
prs->usewide=orig->usewide;
374367

375368
if (orig->pgwstr)
376369
prs->pgwstr=orig->pgwstr+orig->state->poschar;
377370
if (orig->wstr)
378371
prs->wstr=orig->wstr+orig->state->poschar;
379-
#endif
380372

381373
prs->state=newTParserPosition(NULL);
382374
prs->state->state=TPS_Base;
@@ -401,12 +393,10 @@ TParserClose(TParser *prs)
401393
prs->state=ptr;
402394
}
403395

404-
#ifdefUSE_WIDE_UPPER_LOWER
405396
if (prs->wstr)
406397
pfree(prs->wstr);
407398
if (prs->pgwstr)
408399
pfree(prs->pgwstr);
409-
#endif
410400

411401
#ifdefWPARSER_TRACE
412402
fprintf(stderr,"closing parser\n");
@@ -445,96 +435,45 @@ TParserCopyClose(TParser *prs)
445435
*- if locale is C then we use pgwstr instead of wstr.
446436
*/
447437

448-
#ifdefUSE_WIDE_UPPER_LOWER
449-
450-
#definep_iswhat(type)\
438+
#definep_iswhat(type,nonascii)\
439+
\
451440
static int\
452-
p_is##type(TParser *prs) {\
453-
Assert( prs->state );\
454-
if ( prs->usewide )\
441+
p_is##type(TParser *prs)\
442+
{\
443+
Assert(prs->state);\
444+
if (prs->usewide)\
455445
{\
456-
if (prs->pgwstr)\
446+
if (prs->pgwstr)\
457447
{\
458448
unsigned int c = *(prs->pgwstr + prs->state->poschar);\
459-
if (c > 0x7f)\
460-
return0;\
461-
return is##type( c );\
449+
if (c > 0x7f)\
450+
returnnonascii;\
451+
return is##type(c);\
462452
}\
463-
return isw##type( *(prs->wstr + prs->state->poschar ) );\
453+
return isw##type(*(prs->wstr + prs->state->poschar));\
464454
}\
465-
\
466-
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
467-
}\
455+
return is##type(*(unsigned char *) (prs->str + prs->state->posbyte));\
456+
}\
468457
\
469458
static int\
470-
p_isnot##type(TParser *prs) {\
459+
p_isnot##type(TParser *prs)\
460+
{\
471461
return !p_is##type(prs);\
472462
}
473463

474-
staticint
475-
p_isalnum(TParser*prs)
476-
{
477-
Assert(prs->state);
478-
479-
if (prs->usewide)
480-
{
481-
if (prs->pgwstr)
482-
{
483-
unsignedintc=*(prs->pgwstr+prs->state->poschar);
484-
485-
/*
486-
* any non-ascii symbol with multibyte encoding with C-locale is
487-
* an alpha character
488-
*/
489-
if (c>0x7f)
490-
return1;
491-
492-
returnisalnum(c);
493-
}
494-
495-
returniswalnum(*(prs->wstr+prs->state->poschar));
496-
}
497-
498-
returnisalnum(*(unsignedchar*) (prs->str+prs->state->posbyte));
499-
}
500-
staticint
501-
p_isnotalnum(TParser*prs)
502-
{
503-
return !p_isalnum(prs);
504-
}
505-
506-
staticint
507-
p_isalpha(TParser*prs)
508-
{
509-
Assert(prs->state);
510-
511-
if (prs->usewide)
512-
{
513-
if (prs->pgwstr)
514-
{
515-
unsignedintc=*(prs->pgwstr+prs->state->poschar);
516-
517-
/*
518-
* any non-ascii symbol with multibyte encoding with C-locale is
519-
* an alpha character
520-
*/
521-
if (c>0x7f)
522-
return1;
523-
524-
returnisalpha(c);
525-
}
526-
527-
returniswalpha(*(prs->wstr+prs->state->poschar));
528-
}
529-
530-
returnisalpha(*(unsignedchar*) (prs->str+prs->state->posbyte));
531-
}
532-
533-
staticint
534-
p_isnotalpha(TParser*prs)
535-
{
536-
return !p_isalpha(prs);
537-
}
464+
/*
465+
* In C locale with a multibyte encoding, any non-ASCII symbol is considered
466+
* an alpha character, but not a member of other char classes.
467+
*/
468+
p_iswhat(alnum,1)
469+
p_iswhat(alpha,1)
470+
p_iswhat(digit,0)
471+
p_iswhat(lower,0)
472+
p_iswhat(print,0)
473+
p_iswhat(punct,0)
474+
p_iswhat(space,0)
475+
p_iswhat(upper,0)
476+
p_iswhat(xdigit,0)
538477

539478
/* p_iseq should be used only for ascii symbols */
540479

@@ -544,39 +483,6 @@ p_iseq(TParser *prs, char c)
544483
Assert(prs->state);
545484
return ((prs->state->charlen==1&&*(prs->str+prs->state->posbyte)==c)) ?1 :0;
546485
}
547-
#else/* USE_WIDE_UPPER_LOWER */
548-
549-
#definep_iswhat(type)\
550-
static int\
551-
p_is##type(TParser *prs) {\
552-
Assert( prs->state );\
553-
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) );\
554-
}\
555-
\
556-
static int\
557-
p_isnot##type(TParser *prs) {\
558-
return !p_is##type(prs);\
559-
}
560-
561-
562-
staticint
563-
p_iseq(TParser*prs,charc)
564-
{
565-
Assert(prs->state);
566-
return (*(prs->str+prs->state->posbyte)==c) ?1 :0;
567-
}
568-
569-
p_iswhat(alnum)
570-
p_iswhat(alpha)
571-
#endif/* USE_WIDE_UPPER_LOWER */
572-
573-
p_iswhat(digit)
574-
p_iswhat(lower)
575-
p_iswhat(print)
576-
p_iswhat(punct)
577-
p_iswhat(space)
578-
p_iswhat(upper)
579-
p_iswhat(xdigit)
580486

581487
staticint
582488
p_isEOF(TParser*prs)
@@ -793,8 +699,6 @@ p_isspecial(TParser *prs)
793699
if (pg_dsplen(prs->str+prs->state->posbyte)==0)
794700
return1;
795701

796-
#ifdefUSE_WIDE_UPPER_LOWER
797-
798702
/*
799703
* Unicode Characters in the 'Mark, Spacing Combining' Category That
800704
* characters are not alpha although they are not breakers of word too.
@@ -1058,7 +962,6 @@ p_isspecial(TParser *prs)
1058962
StopHigh=StopMiddle;
1059963
}
1060964
}
1061-
#endif
1062965

1063966
return0;
1064967
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp