问题描述:
集群中label:h3两个节点中其中一个tikv的leader数为0。重启集群之后,还是同样的现象。
刚开始是10.205.33.212leader数为0。重启集群后,10.205.33.21 leader数为0。使用了operator add transfer-leader 8202 5切换leader,Leader数还是为0。使用operator操作时tikv的日志:
[2020/09/29 08:46:21.383 +08:00] [INFO] [raft.rs:1739] ["[term 29] received MsgTimeoutNow from 8204 and starts an election to get leadership."] [from=8204] [term=29] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:21.383 +08:00] [INFO] [raft.rs:1177] ["starting a new election"] [term=29] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:21.383 +08:00] [INFO] [raft.rs:807] ["became candidate at term 30"] [term=30] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:21.383 +08:00] [INFO] [raft.rs:902] ["8205 received message from 8205"] [term=30] [msg=MsgRequestVote] [from=8205] [id=8205] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:21.383 +08:00] [INFO] [raft.rs:923] ["[logterm: 29, index: 11099] sent request to 8204"] [msg=MsgRequestVote] [term=30] [id=8204] [log_index=11099] [log_term=29] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:21.383 +08:00] [INFO] [raft.rs:923] ["[logterm: 29, index: 11099] sent request to 8203"] [msg=MsgRequestVote] [term=30] [id=8203] [log_index=11099] [log_term=29] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:21.393 +08:00] [INFO] [raft.rs:1673] ["received from 8204"] [term=30] ["msg type"=MsgRequestVoteResponse] [from=8204] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:21.393 +08:00] [INFO] [raft.rs:874] ["became leader at term 30"] [term=30] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:24.809 +08:00] [INFO] [pd.rs:792] ["try to transfer leader"] [to_peer="id: 8204 store_id: 1"] [from_peer="id: 8205 store_id: 5"] [region_id=8202]
[2020/09/29 08:46:24.810 +08:00] [INFO] [peer.rs:1926] ["transfer leader"] [peer="id: 8204 store_id: 1"] [peer_id=8205] [region_id=8202]
[2020/09/29 08:46:24.810 +08:00] [INFO] [raft.rs:1376] ["[term 30] starts to transfer leadership to 8204"] [lead_transferee=8204] [term=30] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:24.810 +08:00] [INFO] [raft.rs:1389] ["sends MsgTimeoutNow to 8204 immediately as 8204 already has up-to-date log"] [lead_transferee=8204] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:24.812 +08:00] [INFO] [raft.rs:1003] ["received a message with higher term from 8204"] ["msg type"=MsgRequestVote] [message_term=31] [term=30] [from=8204] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:24.812 +08:00] [INFO] [raft.rs:783] ["became follower at term 31"] [term=31] [raft_id=8205] [region_id=8202]
[2020/09/29 08:46:24.812 +08:00] [INFO] [raft.rs:1192] ["[logterm: 30, index: 11100, vote: 0] cast vote for 8204 [logterm: 30, index: 11100] at term 31"] ["msg type"=MsgRequestVote] [term=31] [msg_index=11100] [msg_term=30] [from=8204] [vote=0] [log_index=11100] [log_term=30] [raft_id=8205] [region_id=8202]
重启集群前后tikv leader图
集群拓扑结构:
pd-ctl store显示
? store
{
"count": 6,
"stores": [
{
"store": {
"id": 8,
"address": "10.205.109.79:20160",
"labels": [
{
"key": "host",
"value": "h1"
},
{
"key": "zone",
"value": "zp"
}
],
"version": "4.0.6",
"status_address": "10.205.109.79:20180",
"git_hash": "ca2475bfbcb49a7c34cf783596acb3edd05fc88f",
"start_timestamp": 1601342721,
"deploy_path": "/vdb/tidb/deploy/tikv-20160/bin",
"last_heartbeat": 1601345052136889172,
"state_name": "Up"
},
"status": {
"capacity": "1.922TiB",
"available": "1.886TiB",
"used_size": "33.89GiB",
"leader_count": 788,
"leader_weight": 1,
"leader_score": 788,
"leader_size": 72280,
"region_count": 2002,
"region_weight": 1,
"region_score": 180688,
"region_size": 180688,
"start_ts": "2020-09-29T09:25:21+08:00",
"last_heartbeat_ts": "2020-09-29T10:04:12.136889172+08:00",
"uptime": "38m51.136889172s"
}
},
{
"store": {
"id": 1,
"address": "10.205.109.242:20160",
"labels": [
{
"key": "host",
"value": "h1"
},
{
"key": "zone",
"value": "zp"
}
],
"version": "4.0.6",
"status_address": "10.205.109.242:20180",
"git_hash": "ca2475bfbcb49a7c34cf783596acb3edd05fc88f",
"start_timestamp": 1601342718,
"deploy_path": "/vdb/tidb/deploy/tikv-20160/bin",
"last_heartbeat": 1601345059281137012,
"state_name": "Up"
},
"status": {
"capacity": "1.922TiB",
"available": "1.883TiB",
"used_size": "36.95GiB",
"leader_count": 789,
"leader_weight": 1,
"leader_score": 789,
"leader_size": 72833,
"region_count": 1921,
"region_weight": 1,
"region_score": 179002,
"region_size": 179002,
"start_ts": "2020-09-29T09:25:18+08:00",
"last_heartbeat_ts": "2020-09-29T10:04:19.281137012+08:00",
"uptime": "39m1.281137012s"
}
},
{
"store": {
"id": 4,
"address": "10.205.33.211:20160",
"labels": [
{
"key": "host",
"value": "h2"
},
{
"key": "zone",
"value": "zp"
}
],
"version": "4.0.6",
"status_address": "10.205.33.211:20180",
"git_hash": "ca2475bfbcb49a7c34cf783596acb3edd05fc88f",
"start_timestamp": 1601344615,
"deploy_path": "/vdb/tidb/deploy/tikv-20160/bin",
"last_heartbeat": 1601345055566072097,
"state_name": "Up"
},
"status": {
"capacity": "1.922TiB",
"available": "1.883TiB",
"used_size": "37.1GiB",
"leader_count": 779,
"leader_weight": 1,
"leader_score": 779,
"leader_size": 71619,
"region_count": 1934,
"region_weight": 1,
"region_score": 179050,
"region_size": 179050,
"start_ts": "2020-09-29T09:56:55+08:00",
"last_heartbeat_ts": "2020-09-29T10:04:15.566072097+08:00",
"uptime": "7m20.566072097s"
}
},
{
"store": {
"id": 5,
"address": "10.205.33.212:20160",
"labels": [
{
"key": "host",
"value": "h3"
},
{
"key": "zone",
"value": "zp"
}
],
"version": "4.0.6",
"status_address": "10.205.33.212:20180",
"git_hash": "ca2475bfbcb49a7c34cf783596acb3edd05fc88f",
"start_timestamp": 1601342718,
"deploy_path": "/vdb/tidb/deploy/tikv-20160/bin",
"last_heartbeat": 1601345059385726703,
"state_name": "Up"
},
"status": {
"capacity": "1.922TiB",
"available": "1.884TiB",
"used_size": "36.57GiB",
"leader_count": 789,
"leader_weight": 1,
"leader_score": 789,
"leader_size": 71809,
"region_count": 1938,
"region_weight": 1,
"region_score": 179097,
"region_size": 179097,
"start_ts": "2020-09-29T09:25:18+08:00",
"last_heartbeat_ts": "2020-09-29T10:04:19.385726703+08:00",
"uptime": "39m1.385726703s"
}
},
{
"store": {
"id": 6,
"address": "10.205.33.210:20160",
"labels": [
{
"key": "host",
"value": "h2"
},
{
"key": "zone",
"value": "zp"
}
],
"version": "4.0.6",
"status_address": "10.205.33.210:20180",
"git_hash": "ca2475bfbcb49a7c34cf783596acb3edd05fc88f",
"start_timestamp": 1601344622,
"deploy_path": "/vdb/tidb/deploy/tikv-20160/bin",
"last_heartbeat": 1601345052581353578,
"state_name": "Up"
},
"status": {
"capacity": "1.922TiB",
"available": "1.885TiB",
"used_size": "34.79GiB",
"leader_count": 780,
"leader_weight": 1,
"leader_score": 780,
"leader_size": 71237,
"region_count": 1990,
"region_weight": 1,
"region_score": 180756,
"region_size": 180756,
"start_ts": "2020-09-29T09:57:02+08:00",
"last_heartbeat_ts": "2020-09-29T10:04:12.581353578+08:00",
"uptime": "7m10.581353578s"
}
},
{
"store": {
"id": 7,
"address": "10.205.33.21:20160",
"labels": [
{
"key": "host",
"value": "h3"
},
{
"key": "zone",
"value": "zp"
}
],
"version": "4.0.6",
"status_address": "10.205.33.21:20180",
"git_hash": "ca2475bfbcb49a7c34cf783596acb3edd05fc88f",
"start_timestamp": 1601342722,
"deploy_path": "/vdb/tidb/deploy/tikv-20160/bin",
"last_heartbeat": 1601345053151585411,
"state_name": "Up"
},
"status": {
"capacity": "1.922TiB",
"available": "1.886TiB",
"used_size": "33.96GiB",
"leader_count": 0,
"leader_weight": 1,
"leader_score": 0,
"leader_size": 0,
"region_count": 1990,
"region_weight": 1,
"region_score": 180741,
"region_size": 180741,
"start_ts": "2020-09-29T09:25:22+08:00",
"last_heartbeat_ts": "2020-09-29T10:04:13.151585411+08:00",
"uptime": "38m51.151585411s"
}
}
]
}
?