【 TiDB 使用环境】生产环境
【 TiDB 版本】6.1.2
【复现路径】上次ucan把集群启动后正常的,查看日志发现,3-8号早上就已经报这个错,从而导致产生大量错误日志占用磁盘空间。
【遇到的问题:问题现象及影响】我是在发现数据均衡出了问题,其中一个节点比另外一个节点多了几十个GB,但是看监控pd是在调度的,所以我试着重启了集群,但是其中一个节点启动不起来,经过一系列排查,发现磁盘不均衡的问题是日志文件造成的,而造成错误的日志的问题是failed to send extra message,请教如何修复tikv,查看该节点,tikv servics 一直有在运行,而且cpu 在30%左右。
【资源配置】
【附件:截图/日志/监控】
total 26G
17301515 4.0K . 17301555 301M tikv-2023-03-09T08-54-41.841.log 17301578 301M tikv-2023-03-10T01-23-05.728.log 17301601 301M tikv-2023-03-10T18-06-36.398.log
17301506 4.0K … 17301556 301M tikv-2023-03-09T09-35-18.365.log 17301579 301M tikv-2023-03-10T02-08-40.432.log 17301602 301M tikv-2023-03-10T18-50-29.603.log
17301532 301M tikv-2023-03-06T03-39-51.400.log 17301557 301M tikv-2023-03-09T10-16-04.712.log 17301580 301M tikv-2023-03-10T02-54-24.028.log 17301603 301M tikv-2023-03-10T19-35-29.952.log
17301535 301M tikv-2023-03-06T18-57-08.744.log 17301558 301M tikv-2023-03-09T10-57-03.234.log 17301581 301M tikv-2023-03-10T03-40-24.456.log 17301604 301M tikv-2023-03-10T20-21-33.700.log
17301536 301M tikv-2023-03-06T19-56-55.422.log 17301559 301M tikv-2023-03-09T11-38-26.964.log 17301582 301M tikv-2023-03-10T04-26-32.260.log 17301605 301M tikv-2023-03-10T21-08-31.723.log
17301537 301M tikv-2023-03-07T10-30-28.870.log 17301560 301M tikv-2023-03-09T12-20-00.914.log 17301583 301M tikv-2023-03-10T05-12-54.841.log 17301606 301M tikv-2023-03-10T21-56-31.343.log
17301538 301M tikv-2023-03-07T12-02-05.787.log 17301561 301M tikv-2023-03-09T13-01-23.824.log 17301584 301M tikv-2023-03-10T05-59-38.642.log 17301607 301M tikv-2023-03-10T22-45-30.679.log
[2023/03/11 10:33:57.460 +08:00] [INFO] [pd.rs:1393] [“try to change peer”] [peer=“id: 749208695 store_id: 1 role: Learner”] [change_type=RemoveNode] [region_id=749208694]
[2023/03/11 10:33:57.460 +08:00] [INFO] [peer.rs:4467] [“propose conf change peer”] [kind=Simple] [changes=“[change_type: RemoveNode peer { id: 749208695 store_id: 1 role: Learner }]”] [peer_id=749208697] [region_id=749208694]
[2023/03/11 10:33:57.460 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 39198 store_id: 1”] [peer_id=749191561] [region_id=39197] [type=MsgHibernateRequest]
[2023/03/11 10:33:57.460 +08:00] [INFO] [apply.rs:1396] [“execute admin command”] [command=“cmd_type: ChangePeer change_peer { change_type: RemoveNode peer { id: 749208695 store_id: 1 role: Learner } }”] [index=40] [term=27] [peer_id=749208697] [region_id=749208694]
[2023/03/11 10:33:57.460 +08:00] [INFO] [apply.rs:1770] [“exec ConfChange”] [epoch=“conf_ver: 22 version: 15823”] [type=RemoveNode] [peer_id=749208697] [region_id=749208694]
[2023/03/11 10:33:57.460 +08:00] [INFO] [apply.rs:1878] [“remove peer successfully”] [region=“id: 749208694 start_key: 7480000000000001FF0F5F698000000000FF00000E01616D617AFF6F6E2E69FF740000FF0000000000F80419FFAAB2000000000003FF8000000001137393FF0000000000000000F7 end_key: 7480000000000001FF0F5F698000000000FF00000E01616D617AFF6F6E2E69FF740000FF0000000000F80419FFAABC000000000003FF8000000016161A34FF0000000000000000F7 region_epoch { conf_ver: 22 version: 15823 } peers { id: 749208695 store_id: 1 role: Learner } peers { id: 749208696 store_id: 4 } peers { id: 749208697 store_id: 744798344 } peers { id: 753460446 store_id: 749350518 }”] [peer=“id: 749208695 store_id: 1 role: Learner”] [peer_id=749208697] [region_id=749208694]
[2023/03/11 10:33:57.460 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 162626 store_id: 1”] [peer_id=749186107] [region_id=162625] [type=MsgHibernateRequest]
[2023/03/11 10:33:57.460 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 749201158 store_id: 1”] [peer_id=749201160] [region_id=749201157] [type=MsgHibernateRequest]
[2023/03/11 10:33:57.460 +08:00] [INFO] [raft.rs:2646] [“switched to configuration”] [config=“Configuration { voters: Configuration { incoming: Configuration { voters: {749208696, 749208697, 753460446} }, outgoing: Configuration { voters: {} } }, learners: {}, learners_next: {}, auto_leave: false }”] [raft_id=749208697] [region_id=749208694]
[2023/03/11 10:33:57.460 +08:00] [INFO] [peer.rs:3476] [“notify pd with change peer region”] [region=“id: 749208694 start_key: 7480000000000001FF0F5F698000000000FF00000E01616D617AFF6F6E2E69FF740000FF0000000000F80419FFAAB2000000000003FF8000000001137393FF0000000000000000F7 end_key: 7480000000000001FF0F5F698000000000FF00000E01616D617AFF6F6E2E69FF740000FF0000000000F80419FFAABC000000000003FF8000000016161A34FF0000000000000000F7 region_epoch { conf_ver: 23 version: 15823 } peers { id: 749208696 store_id: 4 } peers { id: 749208697 store_id: 744798344 } peers { id: 753460446 store_id: 749350518 }”] [peer_id=749208697] [region_id=749208694]
[2023/03/11 10:33:57.460 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 184906 store_id: 1”] [peer_id=749156135] [region_id=184905] [type=MsgHibernateRequest]
[2023/03/11 10:33:57.461 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 228622 store_id: 1”] [peer_id=749200956] [region_id=228621] [type=MsgHibernateRequest]
[2023/03/11 10:33:57.462 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 168766 store_id: 1”] [peer_id=749196917] [region_id=168765] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.218 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 133926 store_id: 1”] [peer_id=749174133] [region_id=133925] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.218 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 64394 store_id: 1”] [peer_id=749193035] [region_id=64393] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.219 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 181338 store_id: 1”] [peer_id=747705560] [region_id=181337] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.221 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 19070 store_id: 1”] [peer_id=749201314] [region_id=19069] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.221 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 47754 store_id: 1”] [peer_id=749195078] [region_id=47753] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.222 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 49626 store_id: 1”] [peer_id=749182753] [region_id=49625] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.222 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 749252355 store_id: 1”] [peer_id=749252357] [region_id=749252354] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.222 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 123202 store_id: 1”] [peer_id=749202146] [region_id=123201] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.223 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 9894 store_id: 1”] [peer_id=749219146] [region_id=9893] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.223 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 135630 store_id: 1”] [peer_id=749164422] [region_id=135629] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.224 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 749252180 store_id: 1”] [peer_id=749252182] [region_id=749252179] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.224 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 749270009 store_id: 1”] [peer_id=749270501] [region_id=749270007] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.224 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 55930 store_id: 1”] [peer_id=749236927] [region_id=55929] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.224 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 160150 store_id: 1”] [peer_id=749185509] [region_id=160149] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.225 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 224947 store_id: 1”] [peer_id=749195915] [region_id=224946] [type=MsgHibernateRequest]
[2023/03/11 10:33:58.225 +08:00] [ERROR] [peer.rs:4976] [“failed to send extra message”] [err_code=KV:Raftstore:Transport] [err=Transport(Disconnected)] [target=“id: 34194 store_id: 1”] [peer_id=749193811] [region_id=34193] [type=MsgHibernateRequest]
@
“tikv.log” 1102277L, 281496407C