
# Copyright (c) 2021-2022, PostgreSQL Global Development Group

# test for archiving with hot standby
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use File::Copy;

# Initialize primary node, doing archives
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
$node_primary->init(
	has_archiving    => 1,
	allows_streaming => 1);
my $backup_name = 'my_backup';

# Start it
$node_primary->start;

# Take backup for standby
$node_primary->backup($backup_name);

# Initialize standby node from backup, fetching WAL from archives
my $node_standby = PostgreSQL::Test::Cluster->new('standby');
# Note that this makes the standby store its contents on the archives
# of the primary.
$node_standby->init_from_backup($node_primary, $backup_name,
	has_restoring => 1);
$node_standby->append_conf('postgresql.conf',
	"wal_retrieve_retry_interval = '100ms'");

# Set archive_cleanup_command and recovery_end_command, checking their
# execution by the backend with dummy commands.
my $data_dir                     = $node_standby->data_dir;
my $archive_cleanup_command_file = "archive_cleanup_command.done";
my $recovery_end_command_file    = "recovery_end_command.done";
$node_standby->append_conf(
	'postgresql.conf', qq(
archive_cleanup_command = 'echo archive_cleanup_done > $archive_cleanup_command_file'
recovery_end_command = 'echo recovery_ended_done > $recovery_end_command_file'
));
$node_standby->start;

# Create some content on primary
$node_primary->safe_psql('postgres',
	"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");

# Note the presence of this checkpoint for the archive_cleanup_command
# check done below, before switching to a new segment.
$node_primary->safe_psql('postgres', "CHECKPOINT");

# Done after the checkpoint to ensure that it is replayed on the standby,
# for archive_cleanup_command.
my $current_lsn =
  $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");

# Force archiving of WAL file to make it present on primary
$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");

# Add some more content, it should not be present on standby
$node_primary->safe_psql('postgres',
	"INSERT INTO tab_int VALUES (generate_series(1001,2000))");

# Wait until necessary replay has been done on standby
my $caughtup_query =
  "SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
$node_standby->poll_query_until('postgres', $caughtup_query)
  or die "Timed out while waiting for standby to catch up";

my $result =
  $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(1000), 'check content from archives');

# archive_cleanup_command is executed after generating a restart point,
# with a checkpoint.
$node_standby->safe_psql('postgres', q{CHECKPOINT});
ok( -f "$data_dir/$archive_cleanup_command_file",
	'archive_cleanup_command executed on checkpoint');
ok( !-f "$data_dir/$recovery_end_command_file",
	'recovery_end_command not executed yet');

# Check the presence of temporary files specifically generated during
# archive recovery.  To ensure the presence of the temporary history
# file, switch to a timeline large enough to allow a standby to recover
# a history file from an archive.  As this requires at least two timeline
# switches, promote the existing standby first.  Then create a second
# standby based on the primary, using its archives.  Finally, the second
# standby is promoted.
$node_standby->promote;

# Wait until the history file has been stored on the archives of the
# primary once the promotion of the standby completes.  This ensures that
# the second standby created below will be able to restore this file,
# creating a RECOVERYHISTORY.
my $primary_archive = $node_primary->archive_dir;
$caughtup_query =
  "SELECT size IS NOT NULL FROM pg_stat_file('$primary_archive/00000002.history')";
$node_primary->poll_query_until('postgres', $caughtup_query)
  or die "Timed out while waiting for archiving of 00000002.history";

# recovery_end_command should have been triggered on promotion.
ok( -f "$data_dir/$recovery_end_command_file",
	'recovery_end_command executed after promotion');

my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2');
$node_standby2->init_from_backup($node_primary, $backup_name,
	has_restoring => 1);

# Make execution of recovery_end_command fail.  This should not affect
# promotion, and its failure should be logged.
$node_standby2->append_conf(
	'postgresql.conf', qq(
recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file'
));

$node_standby2->start;

# Save the log location, to see the failure of recovery_end_command.
my $log_location = -s $node_standby2->logfile;

# Now promote standby2, and check that temporary files specifically
# generated during archive recovery are removed by the end of recovery.
$node_standby2->promote;

# Check the logs of the standby to see that the commands have failed.
my $log_contents = slurp_file($node_standby2->logfile, $log_location);
my $node_standby2_data = $node_standby2->data_dir;

like(
	$log_contents,
	qr/restored log file "00000002.history" from archive/s,
	"00000002.history retrieved from the archives");
ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY",
	"RECOVERYHISTORY removed after promotion");
ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG",
	"RECOVERYXLOG removed after promotion");
like(
	$log_contents,
	qr/WARNING:.*recovery_end_command/s,
	"recovery_end_command failure detected in logs after promotion");

done_testing();
