Skip to content

Commit 7e413a0

Browse files
committed
pg_dump: allow multiple rows per insert
This is useful to speed up loading data in a different database engine. Authors: Surafel Temesgen and David Rowley. Lightly edited by Álvaro. Reviewed-by: Fabien Coelho Discussion: https://postgr.es/m/CALAY4q9kumSdnRBzvRJvSRf2+BH20YmSvzqOkvwpEmodD-xv6g@mail.gmail.com
1 parent 4221052 commit 7e413a0

File tree

5 files changed

+193
-77
lines changed

5 files changed

+193
-77
lines changed

doc/src/sgml/ref/pg_dump.sgml

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -661,9 +661,9 @@ PostgreSQL documentation
661661
...</literal>). This will make restoration very slow; it is mainly
662662
useful for making dumps that can be loaded into
663663
non-<productname>PostgreSQL</productname> databases.
664-
However, since this option generates a separate command for each row,
665-
an error in reloading a row causes only that row to be lost rather
666-
than the entire table contents.
664+
Any error during reloading will cause only rows that are part of the
665+
problematic <command>INSERT</command> to be lost, rather than the
666+
entire table contents.
667667
</para>
668668
</listitem>
669669
</varlistentry>
@@ -775,13 +775,12 @@ PostgreSQL documentation
775775
than <command>COPY</command>). This will make restoration very slow;
776776
it is mainly useful for making dumps that can be loaded into
777777
non-<productname>PostgreSQL</productname> databases.
778-
However, since this option generates a separate command for each row,
779-
an error in reloading a row causes only that row to be lost rather
780-
than the entire table contents.
781-
Note that
782-
the restore might fail altogether if you have rearranged column order.
783-
The <option>--column-inserts</option> option is safe against column
784-
order changes, though even slower.
778+
Any error during reloading will cause only rows that are part of the
779+
problematic <command>INSERT</command> to be lost, rather than the
780+
entire table contents. Note that the restore might fail altogether if
781+
you have rearranged column order. The
782+
<option>--column-inserts</option> option is safe against column order
783+
changes, though even slower.
785784
</para>
786785
</listitem>
787786
</varlistentry>
@@ -925,8 +924,9 @@ PostgreSQL documentation
925924
<para>
926925
Add <literal>ON CONFLICT DO NOTHING</literal> to
927926
<command>INSERT</command> commands.
928-
This option is not valid unless <option>--inserts</option> or
929-
<option>--column-inserts</option> is also specified.
927+
This option is not valid unless <option>--inserts</option>,
928+
<option>--column-inserts</option> or
929+
<option>--rows-per-insert</option> is also specified.
930930
</para>
931931
</listitem>
932932
</varlistentry>
@@ -949,6 +949,20 @@ PostgreSQL documentation
949949
</listitem>
950950
</varlistentry>
951951

952+
<varlistentry>
953+
<term><option>--rows-per-insert=<replaceable class="parameter">nrows</replaceable></option></term>
954+
<listitem>
955+
<para>
956+
Dump data as <command>INSERT</command> commands (rather than
957+
<command>COPY</command>). Controls the maximum number of rows per
958+
<command>INSERT</command> command. The value specified must be a
959+
number greater than zero. Any error during reloading will cause only
960+
rows that are part of the problematic <command>INSERT</command> to be
961+
lost, rather than the entire table contents.
962+
</para>
963+
</listitem>
964+
</varlistentry>
965+
952966
<varlistentry>
953967
<term><option>--section=<replaceable class="parameter">sectionname</replaceable></option></term>
954968
<listitem>

src/bin/pg_dump/pg_backup.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,10 @@ typedef struct _dumpOptions
140140
int dumpSections; /* bitmask of chosen sections */
141141
bool aclsSkip;
142142
const char *lockWaitTimeout;
143+
int dump_inserts; /* 0 = COPY, otherwise rows per INSERT */
143144

144145
/* flags for various command-line long options */
145146
int disable_dollar_quoting;
146-
int dump_inserts;
147147
int column_inserts;
148148
int if_exists;
149149
int no_comments;

src/bin/pg_dump/pg_dump.c

Lines changed: 134 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,12 @@ static const CatalogId nilCatalogId = {0, 0};
138138
static bool have_extra_float_digits = false;
139139
static int extra_float_digits;
140140

141+
/*
142+
* The default number of rows per INSERT when
143+
* --inserts is specified without --rows-per-insert
144+
*/
145+
#define DUMP_DEFAULT_ROWS_PER_INSERT 1
146+
141147
/*
142148
* Macro for producing quoted, schema-qualified name of a dumpable object.
143149
*/
@@ -306,11 +312,13 @@ main(int argc, char **argv)
306312
DumpableObject *boundaryObjs;
307313
int i;
308314
int optindex;
315+
char *endptr;
309316
RestoreOptions *ropt;
310317
Archive *fout; /* the script file */
311318
const char *dumpencoding = NULL;
312319
const char *dumpsnapshot = NULL;
313320
char *use_role = NULL;
321+
long rowsPerInsert;
314322
int numWorkers = 1;
315323
trivalue prompt_password = TRI_DEFAULT;
316324
int compressLevel = -1;
@@ -363,7 +371,7 @@ main(int argc, char **argv)
363371
{"exclude-table-data", required_argument, NULL, 4},
364372
{"extra-float-digits", required_argument, NULL, 8},
365373
{"if-exists", no_argument, &dopt.if_exists, 1},
366-
{"inserts", no_argument, &dopt.dump_inserts, 1},
374+
{"inserts", no_argument, NULL, 9},
367375
{"lock-wait-timeout", required_argument, NULL, 2},
368376
{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},
369377
{"quote-all-identifiers", no_argument, &quote_all_identifiers, 1},
@@ -382,6 +390,7 @@ main(int argc, char **argv)
382390
{"no-subscriptions", no_argument, &dopt.no_subscriptions, 1},
383391
{"no-sync", no_argument, NULL, 7},
384392
{"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1},
393+
{"rows-per-insert", required_argument, NULL, 10},
385394

386395
{NULL, 0, NULL, 0}
387396
};
@@ -572,6 +581,31 @@ main(int argc, char **argv)
572581
}
573582
break;
574583

584+
case 9: /* inserts */
585+
586+
/*
587+
* dump_inserts also stores --rows-per-insert, careful not to
588+
* overwrite that.
589+
*/
590+
if (dopt.dump_inserts == 0)
591+
dopt.dump_inserts = DUMP_DEFAULT_ROWS_PER_INSERT;
592+
break;
593+
594+
case 10: /* rows per insert */
595+
errno = 0;
596+
rowsPerInsert = strtol(optarg, &endptr, 10);
597+
598+
if (endptr == optarg || *endptr != '\0' ||
599+
rowsPerInsert <= 0 || rowsPerInsert > INT_MAX ||
600+
errno == ERANGE)
601+
{
602+
write_msg(NULL, "rows-per-insert must be in range %d..%d\n",
603+
1, INT_MAX);
604+
exit_nicely(1);
605+
}
606+
dopt.dump_inserts = (int) rowsPerInsert;
607+
break;
608+
575609
default:
576610
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
577611
exit_nicely(1);
@@ -596,8 +630,8 @@ main(int argc, char **argv)
596630
}
597631

598632
/* --column-inserts implies --inserts */
599-
if (dopt.column_inserts)
600-
dopt.dump_inserts = 1;
633+
if (dopt.column_inserts && dopt.dump_inserts == 0)
634+
dopt.dump_inserts = DUMP_DEFAULT_ROWS_PER_INSERT;
601635

602636
/*
603637
* Binary upgrade mode implies dumping sequence data even in schema-only
@@ -622,8 +656,12 @@ main(int argc, char **argv)
622656
if (dopt.if_exists && !dopt.outputClean)
623657
exit_horribly(NULL, "option --if-exists requires option -c/--clean\n");
624658

625-
if (dopt.do_nothing && !(dopt.dump_inserts || dopt.column_inserts))
626-
exit_horribly(NULL, "option --on-conflict-do-nothing requires option --inserts or --column-inserts\n");
659+
/*
660+
* --inserts are already implied above if --column-inserts or
661+
* --rows-per-insert were specified.
662+
*/
663+
if (dopt.do_nothing && dopt.dump_inserts == 0)
664+
exit_horribly(NULL, "option --on-conflict-do-nothing requires option --inserts, --rows-per-insert or --column-inserts\n");
627665

628666
/* Identify archive format to emit */
629667
archiveFormat = parseArchiveFormat(format, &archiveMode);
@@ -993,6 +1031,7 @@ help(const char *progname)
9931031
printf(_(" --no-unlogged-table-data do not dump unlogged table data\n"));
9941032
printf(_(" --on-conflict-do-nothing add ON CONFLICT DO NOTHING to INSERT commands\n"));
9951033
printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n"));
1034+
printf(_(" --rows-per-insert=NROWS number of rows per INSERT; implies --inserts\n"));
9961035
printf(_(" --section=SECTION dump named section (pre-data, data, or post-data)\n"));
9971036
printf(_(" --serializable-deferrable wait until the dump can run without anomalies\n"));
9981037
printf(_(" --snapshot=SNAPSHOT use given snapshot for the dump\n"));
@@ -1909,9 +1948,9 @@ dumpTableData_insert(Archive *fout, void *dcontext)
19091948
PQExpBuffer q = createPQExpBuffer();
19101949
PQExpBuffer insertStmt = NULL;
19111950
PGresult *res;
1912-
int tuple;
19131951
int nfields;
1914-
int field;
1952+
int rows_per_statement = dopt->dump_inserts;
1953+
int rows_this_statement = 0;
19151954

19161955
appendPQExpBuffer(q, "DECLARE _pg_dump_cursor CURSOR FOR "
19171956
"SELECT * FROM ONLY %s",
@@ -1926,69 +1965,88 @@ dumpTableData_insert(Archive *fout, void *dcontext)
19261965
res = ExecuteSqlQuery(fout, "FETCH 100 FROM _pg_dump_cursor",
19271966
PGRES_TUPLES_OK);
19281967
nfields = PQnfields(res);
1929-
for (tuple = 0; tuple < PQntuples(res); tuple++)
1968+
1969+
/*
1970+
* First time through, we build as much of the INSERT statement as
1971+
* possible in "insertStmt", which we can then just print for each
1972+
* statement. If the table happens to have zero columns then this will
1973+
* be a complete statement, otherwise it will end in "VALUES" and be
1974+
* ready to have the row's column values printed.
1975+
*/
1976+
if (insertStmt == NULL)
19301977
{
1931-
/*
1932-
* First time through, we build as much of the INSERT statement as
1933-
* possible in "insertStmt", which we can then just print for each
1934-
* line. If the table happens to have zero columns then this will
1935-
* be a complete statement, otherwise it will end in "VALUES(" and
1936-
* be ready to have the row's column values appended.
1937-
*/
1938-
if (insertStmt == NULL)
1939-
{
1940-
TableInfo *targettab;
1978+
TableInfo *targettab;
19411979

1942-
insertStmt = createPQExpBuffer();
1980+
insertStmt = createPQExpBuffer();
19431981

1944-
/*
1945-
* When load-via-partition-root is set, get the root table
1946-
* name for the partition table, so that we can reload data
1947-
* through the root table.
1948-
*/
1949-
if (dopt->load_via_partition_root && tbinfo->ispartition)
1950-
targettab = getRootTableInfo(tbinfo);
1951-
else
1952-
targettab = tbinfo;
1982+
/*
1983+
* When load-via-partition-root is set, get the root table name
1984+
* for the partition table, so that we can reload data through the
1985+
* root table.
1986+
*/
1987+
if (dopt->load_via_partition_root && tbinfo->ispartition)
1988+
targettab = getRootTableInfo(tbinfo);
1989+
else
1990+
targettab = tbinfo;
19531991

1954-
appendPQExpBuffer(insertStmt, "INSERT INTO %s ",
1955-
fmtQualifiedDumpable(targettab));
1992+
appendPQExpBuffer(insertStmt, "INSERT INTO %s ",
1993+
fmtQualifiedDumpable(targettab));
19561994

1957-
/* corner case for zero-column table */
1958-
if (nfields == 0)
1959-
{
1960-
appendPQExpBufferStr(insertStmt, "DEFAULT VALUES;\n");
1961-
}
1962-
else
1995+
/* corner case for zero-column table */
1996+
if (nfields == 0)
1997+
{
1998+
appendPQExpBufferStr(insertStmt, "DEFAULT VALUES;\n");
1999+
}
2000+
else
2001+
{
2002+
/* append the list of column names if required */
2003+
if (dopt->column_inserts)
19632004
{
1964-
/* append the list of column names if required */
1965-
if (dopt->column_inserts)
2005+
appendPQExpBufferChar(insertStmt, '(');
2006+
for (int field = 0; field < nfields; field++)
19662007
{
1967-
appendPQExpBufferChar(insertStmt, '(');
1968-
for (field = 0; field < nfields; field++)
1969-
{
1970-
if (field > 0)
1971-
appendPQExpBufferStr(insertStmt, ", ");
1972-
appendPQExpBufferStr(insertStmt,
1973-
fmtId(PQfname(res, field)));
1974-
}
1975-
appendPQExpBufferStr(insertStmt, ") ");
2008+
if (field > 0)
2009+
appendPQExpBufferStr(insertStmt, ", ");
2010+
appendPQExpBufferStr(insertStmt,
2011+
fmtId(PQfname(res, field)));
19762012
}
2013+
appendPQExpBufferStr(insertStmt, ") ");
2014+
}
19772015

1978-
if (tbinfo->needs_override)
1979-
appendPQExpBufferStr(insertStmt, "OVERRIDING SYSTEM VALUE ");
2016+
if (tbinfo->needs_override)
2017+
appendPQExpBufferStr(insertStmt, "OVERRIDING SYSTEM VALUE ");
19802018

1981-
appendPQExpBufferStr(insertStmt, "VALUES (");
1982-
}
2019+
appendPQExpBufferStr(insertStmt, "VALUES");
19832020
}
2021+
}
19842022

1985-
archputs(insertStmt->data, fout);
2023+
for (int tuple = 0; tuple < PQntuples(res); tuple++)
2024+
{
2025+
/* Write the INSERT if not in the middle of a multi-row INSERT. */
2026+
if (rows_this_statement == 0)
2027+
archputs(insertStmt->data, fout);
19862028

1987-
/* if it is zero-column table then we're done */
2029+
/*
2030+
* If it is zero-column table then we've aleady written the
2031+
* complete statement, which will mean we've disobeyed
2032+
* --rows-per-insert when it's set greater than 1. We do support
2033+
* a way to make this multi-row with: SELECT UNION ALL SELECT
2034+
* UNION ALL ... but that's non-standard so we should avoid it
2035+
* given that using INSERTs is mostly only ever needed for
2036+
* cross-database exports.
2037+
*/
19882038
if (nfields == 0)
19892039
continue;
19902040

1991-
for (field = 0; field < nfields; field++)
2041+
/* Emit a row heading */
2042+
if (rows_per_statement == 1)
2043+
archputs(" (", fout);
2044+
else if (rows_this_statement > 0)
2045+
archputs(",\n\t(", fout);
2046+
else
2047+
archputs("\n\t(", fout);
2048+
2049+
for (int field = 0; field < nfields; field++)
19922050
{
19932051
if (field > 0)
19942052
archputs(", ", fout);
@@ -2053,10 +2111,19 @@ dumpTableData_insert(Archive *fout, void *dcontext)
20532111
}
20542112
}
20552113

2056-
if (!dopt->do_nothing)
2057-
archputs(");\n", fout);
2058-
else
2059-
archputs(") ON CONFLICT DO NOTHING;\n", fout);
2114+
/* Terminate the row ... */
2115+
archputs(")", fout);
2116+
2117+
/* ... and the statement, if the target no. of rows is reached */
2118+
if (++rows_this_statement >= rows_per_statement)
2119+
{
2120+
if (dopt->do_nothing)
2121+
archputs(" ON CONFLICT DO NOTHING;\n", fout);
2122+
else
2123+
archputs(";\n", fout);
2124+
/* Reset the row counter */
2125+
rows_this_statement = 0;
2126+
}
20602127
}
20612128

20622129
if (PQntuples(res) <= 0)
@@ -2067,6 +2134,15 @@ dumpTableData_insert(Archive *fout, void *dcontext)
20672134
PQclear(res);
20682135
}
20692136

2137+
/* Terminate any statements that didn't make the row count. */
2138+
if (rows_this_statement > 0)
2139+
{
2140+
if (dopt->do_nothing)
2141+
archputs(" ON CONFLICT DO NOTHING;\n", fout);
2142+
else
2143+
archputs(";\n", fout);
2144+
}
2145+
20702146
archputs("\n\n", fout);
20712147

20722148
ExecuteSqlStatement(fout, "CLOSE _pg_dump_cursor");

src/bin/pg_dump/t/001_basic.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@
118118

119119
command_fails_like(
120120
[ 'pg_dump', '--on-conflict-do-nothing' ],
121-
qr/\Qpg_dump: option --on-conflict-do-nothing requires option --inserts or --column-inserts\E/,
122-
'pg_dump: option --on-conflict-do-nothing requires option --inserts or --column-inserts');
121+
qr/pg_dump: option --on-conflict-do-nothing requires option --inserts, --rows-per-insert or --column-inserts/,
122+
'pg_dump: --on-conflict-do-nothing requires --inserts, --rows-per-insert, --column-inserts');
123123

124124
# pg_dumpall command-line argument checks
125125
command_fails_like(

0 commit comments

Comments
 (0)