background

PostgreSQL Replication with Easy Failback

Join Over 1,000 Engineers & Get New Episodes Weekly!

Learn how to failover to a replica database and then failback to the original primary database easily without a full restore of the database and without using pg_rewind.

This tutorial assumes you have already setup streaming replication between a primary and a replica PostgreSQL server. If you need to set that up, use our Posgres replication tutorial. Also, make sure you have replication slots setup as well. You can set that up using our replication slot tutorial.

# Primary DB cluster called "main" on port 5432
# Replica DB cluster called "replica" on port 5433
# Test database exists with a posts table and two posts

# review data
sudo su - postgres
psql test -c "select * from posts;" -p 5433

# stop main cluster
sudo systemctl stop postgresql@10-main

# promote replica to primary and verify
sudo pg_ctlcluster 10 replica promote
tail -n 100 /var/log/postgresql/postgresql-10-replica.log

psql test -c "insert into posts (id, title, content, type) values
(102, 'Intro to SQL Where Clause', 'Easy as pie!', 'SQL');" -p 5433

Bring up main cluster as a replica

# configure recovery.conf on main cluster
nano /var/lib/postgresql/10/main/recovery.conf

  restore_command = 'cp /var/lib/postgresql/pg_log_archive/main/%f %p'
  recovery_target_timeline = 'latest'
  standby_mode = 'on'
  primary_conninfo = 'user=rep_user passfile=''/var/lib/postgresql/.pgpass'' host=''/var/run/postgresql'' port=5433 sslmode=prefer sslcompression=1 krbsrvname=postgres target_session_attrs=any'
  archive_cleanup_command = 'pg_archivecleanup /var/lib/postgresql/pg_log_archive/main %r'
  primary_slot_name = 'main'

# create a slot for the main cluster
psql -c "select * from pg_create_physical_replication_slot('main');" -p 5433

# start main cluster and verify in sync
sudo systemctl start postgresql@10-main
tail -n 100 /var/log/postgresql/postgresql-10-main.log
psql test -c "select * from posts;" -p 5432

Failback to main cluster as primary

# stop replica cluster
sudo systemctl stop postgresql@10-replica

# promote main to primary and verify
sudo pg_ctlcluster 10 main promote
tail -n 100 /var/log/postgresql/postgresql-10-main.log

psql test -c "insert into posts (id, title, content, type) values
(103, 'Intro to SQL Order Clause', 'What comes first?', 'SQL');" -p 5432

# setup recovery.conf on replica
mv /var/lib/postgresql/10/replica/recovery.done /var/lib/postgresql/10/replica/recovery.conf

# start replica cluster and verify in sync
sudo systemctl start postgresql@10-replica
tail -n 100 /var/log/postgresql/postgresql-10-replica.log
psql test -c "select * from posts;" -p 5433

# review and drop slots on replica
psql -c "select * from pg_replication_slots;" -p 5433
psql -c "select * from pg_drop_replication_slot('main');" -p 5433