标签:count generate error: mina share 执行 切换 ann gre
--主创建表空间
[root@pg93 ~]# mkdir /tbs_test
[root@pg93 ~]# chown postgres:postgres /tbs_test
[root@pg93 ~]# su - postgres
[postgres@pg93 ~]$ psql
psql (12.3)
Type "help" for help.
postgres@postgres:create tablespace tbs_test location ‘/tbs_test‘;
CREATE TABLESPACE
--备节点已经挂了,查看日志:
2021-02-01 20:46:51.375 CST [5451] FATAL: directory "/tbs_test" does not exist
2021-02-01 20:46:51.375 CST [5451] HINT: Create this directory for the tablespace before restarting the server.
2021-02-01 20:46:51.375 CST [5451] CONTEXT: WAL redo at 2/F05B8C8 for Tablespace/CREATE: 16738 "/tbs_test"
2021-02-01 20:46:51.377 CST [5449] LOG: startup process (PID 5451) exited with exit code 1
2021-02-01 20:46:51.378 CST [5449] LOG: terminating any other active server processes
2021-02-01 20:46:51.378 CST [5483] WARNING: terminating connection because of crash of another server process
2021-02-01 20:46:51.378 CST [5483] DETAIL: The postmaster has commanded this server process to roll back the current transaction and exit, because another server process exited abnormally and possibly corrupted shared memory.
2021-02-01 20:46:51.378 CST [5483] HINT: In a moment you should be able to reconnect to the database and repeat your command.
2021-02-01 20:46:51.379 CST [5467] WARNING: terminating connection because of crash of another server process
2021-02-01 20:46:51.379 CST [5467] DETAIL: The postmaster has commanded this server process to roll back the current transaction and exit, because another server process exited abnormally and possibly corrupted shared memory.
2021-02-01 20:46:51.379 CST [5467] HINT: In a moment you should be able to reconnect to the database and repeat your command.
2021-02-01 20:46:51.389 CST [5449] LOG: database system is shut down
--按提示创建目录并重启数据库,同步恢复正常
--但下游节点也挂了哈哈
--
--备设置参数并reload生效
postgres=# show max_standby_streaming_delay;
max_standby_streaming_delay
-----------------------------
5s
(1 row)
--主上运行pgbench
fin_er@findb:create table standby_test (flag varchar(20),id int4);
CREATE TABLE
fin_er@findb:insert into standby_test select ‘tteesstt‘,generate_series(1,1000000);
INSERT 0 1000000
[postgres@pg93 ~]$ pgbench -c 8 -T 120 -d findb -U fin_er -n N -M prepared -f standby.sql >test.out 2>&1 &
[1] 4933
--备上执行查询
[postgres@pg93s ~]$ psql findb fin_er
psql (12.3)
Type "help" for help.
findb=# \timing on
Timing is on.
findb=# select pg_sleep(10),count(*) from standby_test;
ERROR: canceling statement due to conflict with recovery
DETAIL: User query might have needed to see row versions that must be removed.
Time: 5303.822 ms (00:05.304)
1、调大max_standby_streaming_delay
--设置到60s
postgres=# select pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)
postgres=#
postgres=#
postgres=# show max_standby_streaming_delay;
max_standby_streaming_delay
-----------------------------
1min
(1 row)
--主运行pgbench
--备执行查询正常
postgres=# \c findb
You are now connected to database "findb" as user "postgres".
findb=# select pg_sleep(10),count(*) from standby_test;
pg_sleep | count
----------+---------
| 1000000
(1 row)
Time: 10100.055 ms (00:10.100)
2、设置hot_standby_feedback
--备设置参数并reload生效
postgres=# show hot_standby_feedback;
hot_standby_feedback
----------------------
on
(1 row)
postgres=# show max_standby_streaming_delay;
max_standby_streaming_delay
-----------------------------
5s
(1 row)
--备可正常运行长查询
findb=# \timing on
Timing is on.
findb=# select pg_sleep(10),count(*) from standby_test;
pg_sleep | count
----------+---------
| 1000000
(1 row)
Time: 10068.363 ms (00:10.068)
findb=# select pg_sleep(20),count(*) from standby_test;
pg_sleep | count
----------+---------
| 1000000
(1 row)
Time: 20075.533 ms (00:20.076)
--主将wal_keep_segments调小到2
postgres@postgres:alter system set wal_keep_segments=2;
ALTER SYSTEM
postgres@postgres:select pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)
postgres@postgres:show wal_keep_segments;
wal_keep_segments
-------------------
2
(1 row)
--主上运行pgbench加压,生成较多WAL,停掉备
--备看到当前只到1C
-rw------- 1 postgres postgres 16777216 Feb 2 07:35 00000005000000020000001C
drwx------ 20 postgres postgres 4096 Feb 2 07:35 ..
[postgres@pg93s pg_wal]$
--主上在跑pgbench的同时,手动切换一下WAL并checkpoint
postgres@postgres:select pg_switch_wal();
pg_switch_wal
---------------
2/24011878
(1 row)
postgres@postgres:select pg_switch_wal();
pg_switch_wal
---------------
2/2500CF30
(1 row)
postgres@postgres:checkpoint ;
CHECKPOINT
--主上查看1C和后面几个日志已经不在了
[postgres@pg93 pg_wal]$ ls -latr
total 229388
-rw------- 1 postgres postgres 16777216 Feb 2 07:11 00000005000000020000002D
-rw------- 1 postgres postgres 16777216 Feb 2 07:12 00000005000000020000002E
drwx------ 20 postgres postgres 4096 Feb 2 07:31 ..
-rw------- 1 postgres postgres 16777216 Feb 2 07:34 000000050000000200000027
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 000000050000000200000029
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 000000050000000200000028
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 000000050000000200000030
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 00000005000000020000002C
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 000000050000000200000032
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 00000005000000020000002F
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 000000050000000200000031
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 00000005000000020000002B
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 000000050000000200000024
-rw------- 1 postgres postgres 16777216 Feb 2 07:37 000000050000000200000025
drwx------ 2 postgres postgres 4096 Feb 2 07:38 archive_status
-rw------- 1 postgres postgres 16777216 Feb 2 07:38 000000050000000200000026
drwx------ 3 postgres postgres 4096 Feb 2 07:41 .
--启动备,发现找不到需要的日志了
2021-02-02 07:41:38.113 CST [7333] LOG: database system is ready to accept read only connections
2021-02-02 07:41:38.126 CST [7339] LOG: started streaming WAL from primary at 2/1C000000 on timeline 5
2021-02-02 07:41:38.127 CST [7339] FATAL: could not receive data from WAL stream: ERROR: requested WAL segment 00000005000000020000001C has already been removed
2021-02-02 07:41:38.136 CST [7340] LOG: started streaming WAL from primary at 2/1C000000 on timeline 5
2021-02-02 07:41:38.136 CST [7340] FATAL: could not receive data from WAL stream: ERROR: requested WAL segment 00000005000000020000001C has already been removed
--如果源库没有启用日志归档,就真的需要重做备库了(话说PG有没增备恢复备库?)
--将源库的归档日志拷到备库归档目录并设置restore_command
findb=# show restore_command;
restore_command
---------------------------------------
tar -xf /archive/%f.tar.gz -C /pgdata
--重启备库发现很快恢复正常同步
2021-02-02 11:17:12.705 CST [10621] LOG: database system was shut down in recovery at 2021-02-02 11:17:08 CST
tar: /archive/00000006.history.tar.gz: Cannot open: No such file or directory --这里是因为我把history文件删掉了,但不影响
tar: Error is not recoverable: exiting now
2021-02-02 11:17:12.707 CST [10621] LOG: entering standby mode
tar: /archive/00000005.history.tar.gz: Cannot open: No such file or directory
tar: Error is not recoverable: exiting now
2021-02-02 11:17:12.791 CST [10621] LOG: redo starts at 2/1B62E490
2021-02-02 11:17:12.897 CST [10621] LOG: consistent recovery state reached at 2/1CC3B130
2021-02-02 11:17:12.898 CST [10619] LOG: database system is ready to accept read only connections
tar: /archive/000000050000000200000026.tar.gz: Cannot open: No such file or directory
tar: Error is not recoverable: exiting now
2021-02-02 11:17:14.059 CST [10650] LOG: started streaming WAL from primary at 2/26000000 on timeline 5
(1 row)
--查看数据已经正常同步
[postgres@pg93 ~]$ psql findb fin_er
psql (12.3)
Type "help" for help.
fin_er@findb:select count(*) from standby_test where flag=‘leadx‘;
count
-------
28388
(1 row)
[postgres@pg93s log]$ psql findb
psql (12.3)
Type "help" for help.
findb=# select count(*) from standby_test where flag=‘leadx‘;
count
-------
28388
(1 row)
--检查复制状态也正常
postgres@postgres:select usename,application_name,client_addr,state,sync_state,replay_lag from pg_stat_replication;
usename | application_name | client_addr | state | sync_state | replay_lag
---------+------------------+-----------------+-----------+------------+------------
repuser | walreceiver | 192.168.150.132 | streaming | async |
(1 row)
对于主库WAL被覆盖的情况,主要防止方法为:
--主设置max_replication_slots为非0
postgres@postgres:show max_replication_slots;
max_replication_slots
-----------------------
10
(1 row)
postgres@postgres:select * from pg_create_physical_replication_slot(‘phy_std1‘);
slot_name | lsn
-----------+-----
phy_std1 |
(1 row)
postgres@postgres:\x
Expanded display is on.
postgres@postgres:select * from pg_replication_slots;
-[ RECORD 1 ]-------+---------
slot_name | phy_std1
plugin |
slot_type | physical
datoid |
database |
temporary | f
active | f
active_pid |
xmin |
catalog_xmin |
restart_lsn |
confirmed_flush_lsn |
--备配置primary_slot_name以使用该slot
primary_slot_name = ‘phy_std1‘
postgres=# show primary_slot_name;
primary_slot_name
-------------------
phy_std1
(1 row)
--主查看slot,处于active
postgres@postgres:select * from pg_replication_slots;
slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn
-----------+--------+-----------+--------+----------+-----------+--------+------------+--------+--------------+-------------+---------------------
phy_std1 | | physical | | | f | t | 13722 | 133587 | | 2/266705A8 |
(1 row)
--active_pid就是OS上的WAL sender进程的PID
[postgres@pg93 ~]$ ps -ef|grep 13722
postgres 13722 5960 0 14:23 ? 00:00:00 postgres: walsender repuser 192.168.150.132(53920) streaming 2/27000148
--停备,主上运行pgbench压测,并多次手动切换日志
[postgres@pg93 ~]$ pgbench -c 8 -T 1000 -d findb -U fin_er -n N -M prepared -f standby.sql
postgres@postgres:select pg_switch_wal();
pg_switch_wal
---------------
2/323F7060
(1 row)
postgres@postgres:select pg_walfile_name(‘2/323F7060‘);
pg_walfile_name
--------------------------
000000050000000200000032
(1 row)
--重启备后可正常同步
postgres@postgres:select * from pg_replication_slots ;
slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn
-----------+--------+-----------+--------+----------+-----------+--------+------------+--------+--------------+-------------+---------------------
phy_std1 | | physical | | | f | t | 13919 | 139068 | | 2/33000050 |
(1 row)
postgres@postgres:exit
[postgres@pg93 ~]$ ps -ef|grep 13919
postgres 13919 5960 0 14:36 ? 00:00:00 postgres: walsender repuser 192.168.150.132(53922) streaming 2/33000050
postgres 13933 13686 0 14:38 pts/4 00:00:00 grep 13919
[postgres@pg93 ~]$ psql
psql (12.3)
Type "help" for help.
postgres@postgres:select pg_current_wal_lsn();
pg_current_wal_lsn
--------------------
2/33000050
(1 row)
标签:count generate error: mina share 执行 切换 ann gre
原文地址:https://www.cnblogs.com/leadx/p/14380310.html