diff --git a/doc/src/sgml/ref/pg_combinebackup.sgml b/doc/src/sgml/ref/pg_combinebackup.sgml
index 091982f62ad5..55bc46849db5 100644
--- a/doc/src/sgml/ref/pg_combinebackup.sgml
+++ b/doc/src/sgml/ref/pg_combinebackup.sgml
@@ -137,6 +137,35 @@ PostgreSQL documentation
+
+
+
+
+
+ Use hard links instead of copying files to the synthetic backup.
+ Reconstruction of the synthetic backup might be faster (no file copying)
+ and use less disk space, but care must be taken when using the output
+ directory, because any modifications to that directory (for example,
+ starting the server) can also affect the input directories. Likewise,
+ changes to the input directories (for example, starting the server on
+ the full backup) could affect the output directory. Thus, this option
+ is best used when the input directories are only copies that will be
+ removed after pg_combinebackup has completed.
+
+
+
+ Requires that the input backups and the output directory are in the
+ same file system.
+
+
+
+ If a backup manifest is not available or does not contain checksum of
+ the right type, hard links will still be created, but the file will be
+ also read block-by-block for the checksum calculation.
+
+
+
+
@@ -167,7 +196,8 @@ PostgreSQL documentation
Perform regular file copy. This is the default. (See also
- and .)
+ , , and
+ /.)
diff --git a/src/bin/pg_combinebackup/copy_file.c b/src/bin/pg_combinebackup/copy_file.c
index 4e27814839c4..97ecda5a66dd 100644
--- a/src/bin/pg_combinebackup/copy_file.c
+++ b/src/bin/pg_combinebackup/copy_file.c
@@ -40,6 +40,9 @@ static void copy_file_copyfile(const char *src, const char *dst,
pg_checksum_context *checksum_ctx);
#endif
+static void copy_file_link(const char *src, const char *dest,
+ pg_checksum_context *checksum_ctx);
+
/*
* Copy a regular file, optionally computing a checksum, and emitting
* appropriate debug messages. But if we're in dry-run mode, then just emit
@@ -69,7 +72,13 @@ copy_file(const char *src, const char *dst,
}
#ifdef WIN32
- copy_method = COPY_METHOD_COPYFILE;
+ /*
+ * We have no specific switch to enable CopyFile on Windows, because
+ * it's supported (as far as we know) on all Windows machines. So,
+ * automatically enable it unless some other strategy was selected.
+ */
+ if (copy_method == COPY_METHOD_COPY)
+ copy_method = COPY_METHOD_COPYFILE;
#endif
/* Determine the name of the copy strategy for use in log messages. */
@@ -93,6 +102,10 @@ copy_file(const char *src, const char *dst,
strategy_implementation = copy_file_copyfile;
break;
#endif
+ case COPY_METHOD_LINK:
+ strategy_name = "link";
+ strategy_implementation = copy_file_link;
+ break;
}
if (dry_run)
@@ -304,3 +317,21 @@ copy_file_copyfile(const char *src, const char *dst,
checksum_file(src, checksum_ctx);
}
#endif /* WIN32 */
+
+/*
+ * copy_file_link
+ * Hard-links a file from src to dest.
+ *
+ * If needed, also reads the file and calculates the checksum.
+ */
+static void
+copy_file_link(const char *src, const char *dest,
+ pg_checksum_context *checksum_ctx)
+{
+ if (link(src, dest) < 0)
+ pg_fatal("error while linking file from \"%s\" to \"%s\": %m",
+ src, dest);
+
+ /* if needed, calculate checksum of the file */
+ checksum_file(src, checksum_ctx);
+}
diff --git a/src/bin/pg_combinebackup/copy_file.h b/src/bin/pg_combinebackup/copy_file.h
index 92f104115bbc..5a8517629c72 100644
--- a/src/bin/pg_combinebackup/copy_file.h
+++ b/src/bin/pg_combinebackup/copy_file.h
@@ -25,6 +25,7 @@ typedef enum CopyMethod
#ifdef WIN32
COPY_METHOD_COPYFILE,
#endif
+ COPY_METHOD_LINK,
} CopyMethod;
extern void copy_file(const char *src, const char *dst,
diff --git a/src/bin/pg_combinebackup/meson.build b/src/bin/pg_combinebackup/meson.build
index 0c4fd9e62702..e80a4756a7f4 100644
--- a/src/bin/pg_combinebackup/meson.build
+++ b/src/bin/pg_combinebackup/meson.build
@@ -37,6 +37,7 @@ tests += {
't/007_wal_level_minimal.pl',
't/008_promote.pl',
't/009_no_full_file.pl',
+ 't/010_hardlink.pl',
],
}
}
diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c
index 5864ec574fb6..d480dc74436e 100644
--- a/src/bin/pg_combinebackup/pg_combinebackup.c
+++ b/src/bin/pg_combinebackup/pg_combinebackup.c
@@ -135,6 +135,7 @@ main(int argc, char *argv[])
{"no-sync", no_argument, NULL, 'N'},
{"output", required_argument, NULL, 'o'},
{"tablespace-mapping", required_argument, NULL, 'T'},
+ {"link", no_argument, NULL, 'k'},
{"manifest-checksums", required_argument, NULL, 1},
{"no-manifest", no_argument, NULL, 2},
{"sync-method", required_argument, NULL, 3},
@@ -172,7 +173,7 @@ main(int argc, char *argv[])
opt.copy_method = COPY_METHOD_COPY;
/* process command-line options */
- while ((c = getopt_long(argc, argv, "dnNo:T:",
+ while ((c = getopt_long(argc, argv, "dknNo:T:",
long_options, &optindex)) != -1)
{
switch (c)
@@ -181,6 +182,9 @@ main(int argc, char *argv[])
opt.debug = true;
pg_logging_increase_verbosity();
break;
+ case 'k':
+ opt.copy_method = COPY_METHOD_LINK;
+ break;
case 'n':
opt.dry_run = true;
break;
@@ -424,6 +428,11 @@ main(int argc, char *argv[])
}
}
+ /* Warn about the possibility of compromising the backups, when link mode */
+ if (opt.copy_method == COPY_METHOD_LINK)
+ pg_log_warning("--link mode was used; any modifications to the output "
+ "directory may destructively modify input directories");
+
/* It's a success, so don't remove the output directories. */
reset_directory_cleanup_list();
exit(0);
@@ -761,6 +770,7 @@ help(const char *progname)
printf(_(" %s [OPTION]... DIRECTORY...\n"), progname);
printf(_("\nOptions:\n"));
printf(_(" -d, --debug generate lots of debugging output\n"));
+ printf(_(" -k, --link link files instead of copying\n"));
printf(_(" -n, --dry-run do not actually do anything\n"));
printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" -o, --output=DIRECTORY output directory\n"));
diff --git a/src/bin/pg_combinebackup/t/010_hardlink.pl b/src/bin/pg_combinebackup/t/010_hardlink.pl
new file mode 100644
index 000000000000..a0ee419090cf
--- /dev/null
+++ b/src/bin/pg_combinebackup/t/010_hardlink.pl
@@ -0,0 +1,169 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+#
+# This test aims to validate that hard links are created as expected in the
+# output directory, when running pg_combinebackup with --link mode.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Set up a new database instance.
+my $primary = PostgreSQL::Test::Cluster->new('primary');
+$primary->init(has_archiving => 1, allows_streaming => 1);
+$primary->append_conf('postgresql.conf', 'summarize_wal = on');
+# We disable autovacuum to prevent "something else" to modify our test tables.
+$primary->append_conf('postgresql.conf', 'autovacuum = off');
+$primary->start;
+
+# Create a couple of tables (~264KB each).
+# Note: Cirrus CI runs some tests with a very small segment size, so, in that
+# environment, a single table of 264KB would have both a segment with a link
+# count of 1 and also one with a link count of 2. But in a normal installation,
+# segment size is 1GB. Therefore, we use 2 different tables here: for test_1,
+# all segments (or the only one) will have two hard links; for test_2, the
+# last segment (or the only one) will have 1 hard link, and any others will
+# have 2.
+my $query = <<'EOM';
+CREATE TABLE test_%s AS
+ SELECT x.id::bigint,
+ repeat('a', 1600) AS value
+ FROM generate_series(1, 100) AS x(id);
+EOM
+
+$primary->safe_psql('postgres', sprintf($query, '1'));
+$primary->safe_psql('postgres', sprintf($query, '2'));
+
+# Fetch information about the data files.
+$query = <<'EOM';
+SELECT pg_relation_filepath(oid)
+FROM pg_class
+WHERE relname = 'test_%s';
+EOM
+
+my $test_1_path = $primary->safe_psql('postgres', sprintf($query, '1'));
+note "test_1 path is $test_1_path";
+
+my $test_2_path = $primary->safe_psql('postgres', sprintf($query, '2'));
+note "test_2 path is $test_2_path";
+
+# Take a full backup.
+my $backup1path = $primary->backup_dir . '/backup1';
+$primary->command_ok(
+ [
+ 'pg_basebackup',
+ '--pgdata' => $backup1path,
+ '--no-sync',
+ '--checkpoint' => 'fast',
+ '--wal-method' => 'none'
+ ],
+ "full backup");
+
+# Perform an insert that touches a page of the last segment of the data file of
+# table test_2.
+$primary->safe_psql('postgres', <backup_dir . '/backup2';
+$primary->command_ok(
+ [
+ 'pg_basebackup',
+ '--pgdata' => $backup2path,
+ '--no-sync',
+ '--checkpoint' => 'fast',
+ '--wal-method' => 'none',
+ '--incremental' => $backup1path . '/backup_manifest'
+ ],
+ "incremental backup");
+
+# Restore the incremental backup and use it to create a new node.
+my $restore = PostgreSQL::Test::Cluster->new('restore');
+$restore->init_from_backup(
+ $primary, 'backup2',
+ combine_with_prior => ['backup1'],
+ combine_mode => '--link');
+
+# Ensure files have the expected count of hard links. We expect all data files
+# from test_1 to contain 2 hard links, because they were not touched between the
+# full and incremental backups, and the last data file of table test_2 to
+# contain a single hard link because of changes in its last page.
+my $test_1_full_path = join('/', $restore->data_dir, $test_1_path);
+check_data_file($test_1_full_path, 2);
+
+my $test_2_full_path = join('/', $restore->data_dir, $test_2_path);
+check_data_file($test_2_full_path, 1);
+
+# OK, that's all.
+done_testing();
+
+
+# Given the path to the first segment of a data file, inspect its parent
+# directory to find all the segments of that data file, and make sure all the
+# segments contain 2 hard links. The last one must have the given number of hard
+# links.
+#
+# Parameters:
+# * data_file: path to the first segment of a data file, as per the output of
+# pg_relation_filepath.
+# * last_segment_nlinks: the number of hard links expected in the last segment
+# of the given data file.
+sub check_data_file
+{
+ my ($data_file, $last_segment_nlinks) = @_;
+
+ my @data_file_segments = ($data_file);
+
+ # Start checking for additional segments
+ my $segment_number = 1;
+
+ while (1)
+ {
+ my $next_segment = $data_file . '.' . $segment_number;
+
+ # If the file exists and is a regular file, add it to the list
+ if (-f $next_segment)
+ {
+ push @data_file_segments, $next_segment;
+ $segment_number++;
+ }
+ # Stop the loop if the file doesn't exist
+ else
+ {
+ last;
+ }
+ }
+
+ # All segments of the given data file should contain 2 hard links, except
+ # for the last one, which should match the given number of links.
+ my $last_segment = pop @data_file_segments;
+
+ for my $segment (@data_file_segments)
+ {
+ # Get the file's stat information of each segment
+ my $nlink_count = get_hard_link_count($segment);
+ ok($nlink_count == 2, "File '$segment' has 2 hard links");
+ }
+
+ # Get the file's stat information of the last segment
+ my $nlink_count = get_hard_link_count($last_segment);
+ ok($nlink_count == $last_segment_nlinks,
+ "File '$last_segment' has $last_segment_nlinks hard link(s)");
+}
+
+
+# Subroutine to get hard link count of a given file.
+# Receives the path to a file, and returns the number of hard links of
+# that file.
+sub get_hard_link_count
+{
+ my ($file) = @_;
+
+ # Get file stats
+ my @stats = stat($file);
+ my $nlink = $stats[3]; # Number of hard links
+
+ return $nlink;
+}