#include "postgres.h"
#include "catalog/pg_collation.h"
+#include "common/string.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
return false;
stp->filename = filename;
stp->lineno = 0;
+ initStringInfo(&stp->buf);
stp->curline = NULL;
/* Setup error traceback support for ereport() */
stp->cb.callback = tsearch_readline_callback;
char *
tsearch_readline(tsearch_readline_state *stp)
{
- char *result;
+ char *recoded;
/* Advance line number to use in error reports */
stp->lineno++;
/* Clear curline, it's no longer relevant */
if (stp->curline)
{
- pfree(stp->curline);
+ if (stp->curline != stp->buf.data)
+ pfree(stp->curline);
stp->curline = NULL;
}
/* Collect next line, if there is one */
- result = t_readline(stp->fp);
- if (!result)
+ if (!pg_get_line_buf(stp->fp, &stp->buf))
return NULL;
+ /* Validate the input as UTF-8, then convert to DB encoding if needed */
+ recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
+
+ /* Save the correctly-encoded string for possible error reports */
+ stp->curline = recoded; /* might be equal to buf.data */
+
/*
- * Save a copy of the line for possible use in error reports. (We cannot
- * just save "result", since it's likely to get pfree'd at some point by
- * the caller; an error after that would try to access freed data.)
+ * We always return a freshly pstrdup'd string. This is clearly necessary
+ * if pg_any_to_server() returned buf.data, and we need a second copy even
+ * if encoding conversion did occur. The caller is entitled to pfree the
+ * returned string at any time, which would leave curline pointing to
+ * recycled storage, causing problems if an error occurs after that point.
+ * (It's preferable to return the result of pstrdup instead of the output
+ * of pg_any_to_server, because the conversion result tends to be
+ * over-allocated. Since callers might save the result string directly
+ * into a long-lived dictionary structure, we don't want it to be a larger
+ * palloc chunk than necessary. We'll reclaim the conversion result on
+ * the next call.)
*/
- stp->curline = pstrdup(result);
-
- return result;
+ return pstrdup(recoded);
}
/*
/* Suppress use of curline in any error reported below */
if (stp->curline)
{
- pfree(stp->curline);
+ if (stp->curline != stp->buf.data)
+ pfree(stp->curline);
stp->curline = NULL;
}
/* Release other resources */
+ pfree(stp->buf.data);
FreeFile(stp->fp);
/* Pop the error context stack */
/*
* We can't include the text of the config line for errors that occur
- * during t_readline() itself. This is only partly a consequence of our
- * arms-length use of that routine: the major cause of such errors is
+ * during tsearch_readline() itself. The major cause of such errors is
* encoding violations, and we daren't try to print error messages
* containing badly-encoded data.
*/
}
-/*
- * Read the next line from a tsearch data file (expected to be in UTF-8), and
- * convert it to database encoding if needed. The returned string is palloc'd.
- * NULL return means EOF.
- *
- * Note: direct use of this function is now deprecated. Go through
- * tsearch_readline() to provide better error reporting.
- */
-char *
-t_readline(FILE *fp)
-{
- int len;
- char *recoded;
- char buf[4096]; /* lines must not be longer than this */
-
- if (fgets(buf, sizeof(buf), fp) == NULL)
- return NULL;
-
- len = strlen(buf);
-
- /* Make sure the input is valid UTF-8 */
- (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
-
- /* And convert */
- recoded = pg_any_to_server(buf, len, PG_UTF8);
- if (recoded == buf)
- {
- /*
- * conversion didn't pstrdup, so we must. We can use the length of the
- * original string, because no conversion was done.
- */
- recoded = pnstrdup(recoded, len);
- }
-
- return recoded;
-}
-
/*
* lowerstr --- fold null-terminated string to lower case
*