Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit83b6131

Browse files
committed
Improve behavior of tsearch_readline(), and remove t_readline().
Commitfbeb9da, which added the tsearch_readline APIs, leftt_readline() in place as a compatibility measure. But that functionhas been unused and deprecated for twelve years now, so that seemslike enough time to remove it. Doing so, and merging t_readline'scode into tsearch_readline, aids in making several usefulimprovements:* The hard-wired 4K limit on line length in tsearch data files isremoved, by using a StringInfo buffer instead of a fixed-size buffer.* We can buy back the per-line palloc/pfree added by3ea7e95in the common case where encoding conversion is not required.* We no longer need a separate pg_verify_mbstr call, as thatfunctionality was folded into encoding conversion some time ago.(We could have done some of this stuff while keeping t_readline as aseparate API, but there seems little point, since there's no reasonfor anyone to still be using t_readline directly.)Discussion:https://postgr.es/m/48A4FA71-524E-41B9-953A-FD04EF36E2E7@yesql.se
1 parentaca7484 commit83b6131

File tree

3 files changed

+32
-58
lines changed

3 files changed

+32
-58
lines changed

‎src/backend/tsearch/dict_thesaurus.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
286286
(errcode(ERRCODE_CONFIG_FILE_ERROR),
287287
errmsg("unexpected end of line")));
288288

289-
/*
290-
* Note: currently, tsearch_readline can't return lines exceeding 4KB,
291-
* so overflow of the word counts is impossible. But that may not
292-
* always be true, so let's check.
293-
*/
294289
if (nwrd!= (uint16)nwrd||posinsubst!= (uint16)posinsubst)
295290
ereport(ERROR,
296291
(errcode(ERRCODE_CONFIG_FILE_ERROR),

‎src/backend/tsearch/ts_locale.c

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include"postgres.h"
1515

1616
#include"catalog/pg_collation.h"
17+
#include"common/string.h"
1718
#include"storage/fd.h"
1819
#include"tsearch/ts_locale.h"
1920
#include"tsearch/ts_public.h"
@@ -128,6 +129,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
128129
return false;
129130
stp->filename=filename;
130131
stp->lineno=0;
132+
initStringInfo(&stp->buf);
131133
stp->curline=NULL;
132134
/* Setup error traceback support for ereport() */
133135
stp->cb.callback=tsearch_readline_callback;
@@ -145,31 +147,43 @@ tsearch_readline_begin(tsearch_readline_state *stp,
145147
char*
146148
tsearch_readline(tsearch_readline_state*stp)
147149
{
148-
char*result;
150+
char*recoded;
149151

150152
/* Advance line number to use in error reports */
151153
stp->lineno++;
152154

153155
/* Clear curline, it's no longer relevant */
154156
if (stp->curline)
155157
{
156-
pfree(stp->curline);
158+
if (stp->curline!=stp->buf.data)
159+
pfree(stp->curline);
157160
stp->curline=NULL;
158161
}
159162

160163
/* Collect next line, if there is one */
161-
result=t_readline(stp->fp);
162-
if (!result)
164+
if (!pg_get_line_buf(stp->fp,&stp->buf))
163165
returnNULL;
164166

167+
/* Validate the input as UTF-8, then convert to DB encoding if needed */
168+
recoded=pg_any_to_server(stp->buf.data,stp->buf.len,PG_UTF8);
169+
170+
/* Save the correctly-encoded string for possible error reports */
171+
stp->curline=recoded;/* might be equal to buf.data */
172+
165173
/*
166-
* Save a copy of the line for possible use in error reports. (We cannot
167-
* just save "result", since it's likely to get pfree'd at some point by
168-
* the caller; an error after that would try to access freed data.)
174+
* We always return a freshly pstrdup'd string. This is clearly necessary
175+
* if pg_any_to_server() returned buf.data, and we need a second copy even
176+
* if encoding conversion did occur. The caller is entitled to pfree the
177+
* returned string at any time, which would leave curline pointing to
178+
* recycled storage, causing problems if an error occurs after that point.
179+
* (It's preferable to return the result of pstrdup instead of the output
180+
* of pg_any_to_server, because the conversion result tends to be
181+
* over-allocated. Since callers might save the result string directly
182+
* into a long-lived dictionary structure, we don't want it to be a larger
183+
* palloc chunk than necessary. We'll reclaim the conversion result on
184+
* the next call.)
169185
*/
170-
stp->curline=pstrdup(result);
171-
172-
returnresult;
186+
returnpstrdup(recoded);
173187
}
174188

175189
/*
@@ -181,11 +195,13 @@ tsearch_readline_end(tsearch_readline_state *stp)
181195
/* Suppress use of curline in any error reported below */
182196
if (stp->curline)
183197
{
184-
pfree(stp->curline);
198+
if (stp->curline!=stp->buf.data)
199+
pfree(stp->curline);
185200
stp->curline=NULL;
186201
}
187202

188203
/* Release other resources */
204+
pfree(stp->buf.data);
189205
FreeFile(stp->fp);
190206

191207
/* Pop the error context stack */
@@ -203,8 +219,7 @@ tsearch_readline_callback(void *arg)
203219

204220
/*
205221
* We can't include the text of the config line for errors that occur
206-
* during t_readline() itself. This is only partly a consequence of our
207-
* arms-length use of that routine: the major cause of such errors is
222+
* during tsearch_readline() itself. The major cause of such errors is
208223
* encoding violations, and we daren't try to print error messages
209224
* containing badly-encoded data.
210225
*/
@@ -220,43 +235,6 @@ tsearch_readline_callback(void *arg)
220235
}
221236

222237

223-
/*
224-
* Read the next line from a tsearch data file (expected to be in UTF-8), and
225-
* convert it to database encoding if needed. The returned string is palloc'd.
226-
* NULL return means EOF.
227-
*
228-
* Note: direct use of this function is now deprecated. Go through
229-
* tsearch_readline() to provide better error reporting.
230-
*/
231-
char*
232-
t_readline(FILE*fp)
233-
{
234-
intlen;
235-
char*recoded;
236-
charbuf[4096];/* lines must not be longer than this */
237-
238-
if (fgets(buf,sizeof(buf),fp)==NULL)
239-
returnNULL;
240-
241-
len=strlen(buf);
242-
243-
/* Make sure the input is valid UTF-8 */
244-
(void)pg_verify_mbstr(PG_UTF8,buf,len, false);
245-
246-
/* And convert */
247-
recoded=pg_any_to_server(buf,len,PG_UTF8);
248-
if (recoded==buf)
249-
{
250-
/*
251-
* conversion didn't pstrdup, so we must. We can use the length of the
252-
* original string, because no conversion was done.
253-
*/
254-
recoded=pnstrdup(recoded,len);
255-
}
256-
257-
returnrecoded;
258-
}
259-
260238
/*
261239
* lowerstr --- fold null-terminated string to lower case
262240
*

‎src/include/tsearch/ts_locale.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include<ctype.h>
1616
#include<limits.h>
1717

18+
#include"lib/stringinfo.h"
1819
#include"mb/pg_wchar.h"
1920
#include"utils/pg_locale.h"
2021

@@ -33,7 +34,9 @@ typedef struct
3334
FILE*fp;
3435
constchar*filename;
3536
intlineno;
36-
char*curline;
37+
StringInfoDatabuf;/* current input line, in UTF-8 */
38+
char*curline;/* current input line, in DB's encoding */
39+
/* curline may be NULL, or equal to buf.data, or a palloc'd string */
3740
ErrorContextCallbackcb;
3841
}tsearch_readline_state;
3942

@@ -57,6 +60,4 @@ extern bool tsearch_readline_begin(tsearch_readline_state *stp,
5760
externchar*tsearch_readline(tsearch_readline_state*stp);
5861
externvoidtsearch_readline_end(tsearch_readline_state*stp);
5962

60-
externchar*t_readline(FILE*fp);
61-
6263
#endif/* __TSLOCALE_H__ */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp