br全量备份数据校验失败

【 TiDB 使用环境】
运行在kubernetes
5.0.1版本

【概述】 场景 + 问题概述
通过br进行全量备份,在数据校验环节失败,缺少backupmeta文件,日志内容如下:
[2022/03/15 14:50:09.140 +08:00] [INFO] [peer.go:128] [“starting remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.140 +08:00] [INFO] [pipeline.go:71] [“started HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.141 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.141 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [peer.go:134] [“started remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [transport.go:327] [“added remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9] [remote-peer-urls="[http://prod-tidb-pd-1.prod-tidb-pd-peer.vpc-prod.svc:2380]"]

[2022/03/15 14:50:09.142 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [peer.go:128] [“starting remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [pipeline.go:71] [“started HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.143 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.143 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.144 +08:00] [INFO] [peer.go:134] [“started remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.144 +08:00] [INFO] [transport.go:327] [“added remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9] [remote-peer-urls="[http://prod-tidb-pd-0.prod-tidb-pd-peer.vpc-prod.svc:2380]"]

[2022/03/15 14:50:09.144 +08:00] [INFO] [server.go:779] [“starting etcd server”] [local-member-id=5a8e9dd5ea2f7640] [local-server-version=3.4.3] [cluster-id=974d8bcf14b0da02] [cluster-version=3.4]

[2022/03/15 14:50:09.146 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.146 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.146 +08:00] [INFO] [server.go:680] [“starting initial election tick advance”] [election-ticks=6]

[2022/03/15 14:50:09.149 +08:00] [INFO] [etcd.go:241] [“now serving peer/client/metrics”] [local-member-id=5a8e9dd5ea2f7640] [initial-advertise-peer-urls="[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2380]"] [listen-peer-urls="[http://0.0.0.0:2380]"] [advertise-client-urls="[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]"] [listen-client-urls="[http://0.0.0.0:2379]"] [listen-metrics-urls="[]"]

[2022/03/15 14:50:09.150 +08:00] [INFO] [etcd.go:576] [“serving peer traffic”] [address="[::]:2380"]

[2022/03/15 14:50:09.156 +08:00] [WARN] [server.go:1074] [“server error”] [error=“the member has been permanently removed from the cluster”]

[2022/03/15 14:50:09.156 +08:00] [WARN] [server.go:1075] [“data-dir used by this member must be removed”]

[2022/03/15 14:50:09.156 +08:00] [WARN] [server.go:2045] [“failed to publish local member to cluster through raft”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [request-path=/0/members/5a8e9dd5ea2f7640/attributes] [publish-timeout=11s] [error=“etcdserver: request cancelled”]

[2022/03/15 14:50:09.239 +08:00] [WARN] [server.go:2045] [“failed to publish local member to cluster through raft”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [request-path=/0/members/5a8e9dd5ea2f7640/attributes] [publish-timeout=11s] [error=“etcdserver: request cancelled”]

[2022/03/15 14:50:09.239 +08:00] [WARN] [server.go:2045] [“failed to publish local member to cluster through raft”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [request-path=/0/members/5a8e9dd5ea2f7640/attributes] [publish-timeout=11s] [error=“etcdserver: request cancelled”]

[2022/03/15 14:50:09.239 +08:00] [WARN] [server.go:2031] [“stopped publish because server is stopped”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [publish-timeout=11s] [error=“etcdserver: server stopped”]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:333] [“stopping remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [pipeline.go:86] [“stopped HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:340] [“stopped remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:333] [“stopping remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [pipeline.go:86] [“stopped HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:340] [“stopped remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

目前发现pd存在异常,三个pd实例,其中一个总是频繁重启,监控中只显示两个pd,异常pd实例的日志如下:
[2022/03/15 14:50:08.471 +08:00] [INFO] [server.go:222] [“PD Config”] [config="{“client-urls”:“http://0.0.0.0:2379”,“peer-urls”:“http://0.0.0.0:2380”,“advertise-client-urls”:“http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379”,“advertise-peer-urls”:“http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2380”,“name”:“prod-tidb-pd-2”,“data-dir”:"/var/lib/pd",“force-new-cluster”:false,“enable-grpc-gateway”:true,“initial-cluster”:“prod-tidb-pd-2=http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2380,prod-tidb-pd-1=http://prod-tidb-pd-1.prod-tidb-pd-peer.vpc-prod.svc:2380,prod-tidb-pd-0=http://prod-tidb-pd-0.prod-tidb-pd-peer.vpc-prod.svc:2380”,“initial-cluster-state”:“existing”,“initial-cluster-token”:“pd-cluster”,“join”:“http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2380,http://prod-tidb-pd-1.prod-tidb-pd-peer.vpc-prod.svc:2380,http://prod-tidb-pd-0.prod-tidb-pd-peer.vpc-prod.svc:2380”,“lease”:3,“log”:{“level”:"",“format”:“text”,“disable-timestamp”:false,“file”:{“filename”:"",“max-size”:0,“max-days”:0,“max-backups”:0},“development”:false,“disable-caller”:false,“disable-stacktrace”:false,“disable-error-verbose”:true,“sampling”:null},“tso-save-interval”:“3s”,“tso-update-physical-interval”:“50ms”,“enable-local-tso”:false,“metric”:{“job”:“prod-tidb-pd-2”,“address”:"",“interval”:“15s”},“schedule”:{“max-snapshot-count”:3,“max-pending-peer-count”:16,“max-merge-region-size”:20,“max-merge-region-keys”:200000,“split-merge-interval”:“1h0m0s”,“enable-one-way-merge”:“false”,“enable-cross-table-merge”:“true”,“patrol-region-interval”:“100ms”,“max-store-down-time”:“30m0s”,“leader-schedule-limit”:4,“leader-schedule-policy”:“count”,“region-schedule-limit”:2048,“replica-schedule-limit”:64,“merge-schedule-limit”:8,“hot-region-schedule-limit”:4,“hot-region-cache-hits-threshold”:3,“store-limit”:{},“tolerant-size-ratio”:0,“low-space-ratio”:0.8,“high-space-ratio”:0.7,“region-score-formula-version”:“v2”,“scheduler-max-waiting-operator”:5,“enable-remove-down-replica”:“true”,“enable-replace-offline-replica”:“true”,“enable-make-up-replica”:“true”,“enable-remove-extra-replica”:“true”,“enable-location-replacement”:“true”,“enable-debug-metrics”:“false”,“enable-joint-consensus”:“true”,“schedulers-v2”:[{“type”:“balance-region”,“args”:null,“disable”:false,“args-payload”:""},{“type”:“balance-leader”,“args”:null,“disable”:false,“args-payload”:""},{“type”:“hot-region”,“args”:null,“disable”:false,“args-payload”:""},{“type”:“label”,“args”:null,“disable”:false,“args-payload”:""}],“schedulers-payload”:null,“store-limit-mode”:“manual”},“replication”:{“max-replicas”:3,“location-labels”:"",“strictly-match-label”:“false”,“enable-placement-rules”:“true”,“isolation-level”:""},“pd-server”:{“use-region-storage”:“true”,“max-gap-reset-ts”:“24h0m0s”,“key-type”:“table”,“runtime-services”:"",“metric-storage”:"",“dashboard-address”:“auto”,“trace-region-flow”:“true”},“cluster-version”:“0.0.0”,“labels”:{},“quota-backend-bytes”:“8GiB”,“auto-compaction-mode”:“periodic”,“auto-compaction-retention-v2”:“1h”,“TickInterval”:“500ms”,“ElectionInterval”:“3s”,“PreVote”:true,“security”:{“cacert-path”:"",“cert-path”:"",“key-path”:"",“cert-allowed-cn”:null,“redact-info-log”:false,“encryption”:{“data-encryption-method”:“plaintext”,“data-key-rotation-period”:“168h0m0s”,“master-key”:{“type”:“plaintext”,“key-id”:"",“region”:"",“endpoint”:"",“path”:""}}},“label-property”:null,“WarningMsgs”:null,“DisableStrictReconfigCheck”:false,“HeartbeatStreamBindInterval”:“1m0s”,“LeaderPriorityCheckInterval”:“1m0s”,“dashboard”:{“tidb-cacert-path”:"",“tidb-cert-path”:"",“tidb-key-path”:"",“public-path-prefix”:"",“internal-proxy”:true,“enable-telemetry”:true,“enable-experimental”:false},“replication-mode”:{“replication-mode”:“majority”,“dr-auto-sync”:{“label-key”:"",“primary”:"",“dr”:"",“primary-replicas”:0,“dr-replicas”:0,“wait-store-timeout”:“1m0s”,“wait-sync-timeout”:“1m0s”,“wait-async-timeout”:“2m0s”}}}"]

[2022/03/15 14:50:08.476 +08:00] [INFO] [server.go:195] [“register REST path”] [path=/pd/api/v1]

[2022/03/15 14:50:08.476 +08:00] [INFO] [server.go:195] [“register REST path”] [path=/swagger/]

[2022/03/15 14:50:08.476 +08:00] [INFO] [server.go:195] [“register REST path”] [path=/autoscaling]

[2022/03/15 14:50:08.479 +08:00] [INFO] [server.go:195] [“register REST path”] [path=/dashboard/api/]

[2022/03/15 14:50:08.479 +08:00] [INFO] [server.go:195] [“register REST path”] [path=/dashboard/]

[2022/03/15 14:50:08.479 +08:00] [INFO] [etcd.go:117] [“configuring peer listeners”] [listen-peer-urls="[http://0.0.0.0:2380]"]

[2022/03/15 14:50:08.479 +08:00] [INFO] [systimemon.go:27] [“start system time monitor”]

[2022/03/15 14:50:08.479 +08:00] [INFO] [etcd.go:127] [“configuring client listeners”] [listen-client-urls="[http://0.0.0.0:2379]"]

[2022/03/15 14:50:08.479 +08:00] [INFO] [etcd.go:602] [“pprof is enabled”] [path=/debug/pprof]

[2022/03/15 14:50:08.480 +08:00] [INFO] [etcd.go:299] [“starting an etcd server”] [etcd-version=3.4.3] [git-sha=“Not provided (use ./build instead of go build)”] [go-version=go1.13] [go-os=linux] [go-arch=amd64] [max-cpu-set=8] [max-cpu-available=8] [member-initialized=true] [name=prod-tidb-pd-2] [data-dir=/var/lib/pd] [wal-dir=] [wal-dir-dedicated=] [member-dir=/var/lib/pd/member] [force-new-cluster=false] [heartbeat-interval=500ms] [election-timeout=3s] [initial-election-tick-advance=true] [snapshot-count=100000] [snapshot-catchup-entries=5000] [initial-advertise-peer-urls="[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2380]"] [listen-peer-urls="[http://0.0.0.0:2380]"] [advertise-client-urls="[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]"] [listen-client-urls="[http://0.0.0.0:2379]"] [listen-metrics-urls="[]"] [cors="[]"] [host-whitelist="[]"] [initial-cluster=] [initial-cluster-state=existing] [initial-cluster-token=] [quota-size-bytes=8589934592] [pre-vote=true] [initial-corrupt-check=false] [corrupt-check-time-interval=0s] [auto-compaction-mode=periodic] [auto-compaction-retention=1h0m0s] [auto-compaction-interval=1h0m0s] [discovery-url=] [discovery-proxy=]

[2022/03/15 14:50:08.539 +08:00] [INFO] [backend.go:79] [“opened backend db”] [path=/var/lib/pd/member/snap/db] [took=58.268344ms]

[2022/03/15 14:50:08.540 +08:00] [INFO] [server.go:443] [“recovered v2 store from snapshot”] [snapshot-index=7800078] [snapshot-size=“20 kB”]

[2022/03/15 14:50:08.541 +08:00] [INFO] [kvstore.go:378] [“restored last compact revision”] [meta-bucket-name=meta] [meta-bucket-name-key=finishedCompactRev] [restored-compact-revision=7867104]

[2022/03/15 14:50:08.549 +08:00] [INFO] [server.go:461] [“recovered v3 backend from snapshot”] [backend-size-bytes=1552384] [backend-size=“1.6 MB”] [backend-size-in-use-bytes=720896] [backend-size-in-use=“721 kB”]

[2022/03/15 14:50:09.068 +08:00] [INFO] [raft.go:506] [“restarting local member”] [cluster-id=974d8bcf14b0da02] [local-member-id=5a8e9dd5ea2f7640] [commit-index=7884500]

[2022/03/15 14:50:09.071 +08:00] [INFO] [raft.go:1530] [“5a8e9dd5ea2f7640 switched to configuration voters=(6525326452189001280 12687467621752554409 18260196852577737449)”]

[2022/03/15 14:50:09.071 +08:00] [INFO] [raft.go:700] [“5a8e9dd5ea2f7640 became follower at term 20”]

[2022/03/15 14:50:09.071 +08:00] [INFO] [raft.go:383] [“newRaft 5a8e9dd5ea2f7640 [peers: [5a8e9dd5ea2f7640,b012f094b5d4c3a9,fd69404a6e9a1ae9], term: 20, commit: 7884500, applied: 7800078, lastindex: 7884500, lastterm: 20]”]

[2022/03/15 14:50:09.071 +08:00] [INFO] [capability.go:76] [“enabled capabilities for version”] [cluster-version=3.4]

[2022/03/15 14:50:09.071 +08:00] [INFO] [cluster.go:256] [“recovered/added member from store”] [cluster-id=974d8bcf14b0da02] [local-member-id=5a8e9dd5ea2f7640] [recovered-remote-peer-id=5a8e9dd5ea2f7640] [recovered-remote-peer-urls="[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2380]"]

[2022/03/15 14:50:09.071 +08:00] [INFO] [cluster.go:256] [“recovered/added member from store”] [cluster-id=974d8bcf14b0da02] [local-member-id=5a8e9dd5ea2f7640] [recovered-remote-peer-id=b012f094b5d4c3a9] [recovered-remote-peer-urls="[http://prod-tidb-pd-1.prod-tidb-pd-peer.vpc-prod.svc:2380]"]

[2022/03/15 14:50:09.071 +08:00] [INFO] [cluster.go:256] [“recovered/added member from store”] [cluster-id=974d8bcf14b0da02] [local-member-id=5a8e9dd5ea2f7640] [recovered-remote-peer-id=fd69404a6e9a1ae9] [recovered-remote-peer-urls="[http://prod-tidb-pd-0.prod-tidb-pd-peer.vpc-prod.svc:2380]"]

[2022/03/15 14:50:09.071 +08:00] [INFO] [cluster.go:269] [“set cluster version from store”] [cluster-version=3.4]

[2022/03/15 14:50:09.072 +08:00] [INFO] [kvstore.go:378] [“restored last compact revision”] [meta-bucket-name=meta] [meta-bucket-name-key=finishedCompactRev] [restored-compact-revision=7867104]

[2022/03/15 14:50:09.139 +08:00] [WARN] [store.go:1317] [“simple token is not cryptographically signed”]

[2022/03/15 14:50:09.140 +08:00] [INFO] [quota.go:126] [“enabled backend quota”] [quota-name=v3-applier] [quota-size-bytes=8589934592] [quota-size=“8.6 GB”]

[2022/03/15 14:50:09.140 +08:00] [INFO] [peer.go:128] [“starting remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.140 +08:00] [INFO] [pipeline.go:71] [“started HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.141 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.141 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [peer.go:134] [“started remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [transport.go:327] [“added remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9] [remote-peer-urls="[http://prod-tidb-pd-1.prod-tidb-pd-peer.vpc-prod.svc:2380]"]

[2022/03/15 14:50:09.142 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [peer.go:128] [“starting remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.142 +08:00] [INFO] [pipeline.go:71] [“started HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.143 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.143 +08:00] [INFO] [stream.go:166] [“started stream writer with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.144 +08:00] [INFO] [peer.go:134] [“started remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.144 +08:00] [INFO] [transport.go:327] [“added remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9] [remote-peer-urls="[http://prod-tidb-pd-0.prod-tidb-pd-peer.vpc-prod.svc:2380]"]

[2022/03/15 14:50:09.144 +08:00] [INFO] [server.go:779] [“starting etcd server”] [local-member-id=5a8e9dd5ea2f7640] [local-server-version=3.4.3] [cluster-id=974d8bcf14b0da02] [cluster-version=3.4]

[2022/03/15 14:50:09.146 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.146 +08:00] [INFO] [stream.go:406] [“started stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.146 +08:00] [INFO] [server.go:680] [“starting initial election tick advance”] [election-ticks=6]

[2022/03/15 14:50:09.149 +08:00] [INFO] [etcd.go:241] [“now serving peer/client/metrics”] [local-member-id=5a8e9dd5ea2f7640] [initial-advertise-peer-urls="[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2380]"] [listen-peer-urls="[http://0.0.0.0:2380]"] [advertise-client-urls="[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]"] [listen-client-urls="[http://0.0.0.0:2379]"] [listen-metrics-urls="[]"]

[2022/03/15 14:50:09.150 +08:00] [INFO] [etcd.go:576] [“serving peer traffic”] [address="[::]:2380"]

[2022/03/15 14:50:09.156 +08:00] [WARN] [server.go:1074] [“server error”] [error=“the member has been permanently removed from the cluster”]

[2022/03/15 14:50:09.156 +08:00] [WARN] [server.go:1075] [“data-dir used by this member must be removed”]

[2022/03/15 14:50:09.156 +08:00] [WARN] [server.go:2045] [“failed to publish local member to cluster through raft”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [request-path=/0/members/5a8e9dd5ea2f7640/attributes] [publish-timeout=11s] [error=“etcdserver: request cancelled”]

[2022/03/15 14:50:09.239 +08:00] [WARN] [server.go:2045] [“failed to publish local member to cluster through raft”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [request-path=/0/members/5a8e9dd5ea2f7640/attributes] [publish-timeout=11s] [error=“etcdserver: request cancelled”]

[2022/03/15 14:50:09.239 +08:00] [WARN] [server.go:2045] [“failed to publish local member to cluster through raft”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [request-path=/0/members/5a8e9dd5ea2f7640/attributes] [publish-timeout=11s] [error=“etcdserver: request cancelled”]

[2022/03/15 14:50:09.239 +08:00] [WARN] [server.go:2031] [“stopped publish because server is stopped”] [local-member-id=5a8e9dd5ea2f7640] [local-member-attributes="{Name:prod-tidb-pd-2 ClientURLs:[http://prod-tidb-pd-2.prod-tidb-pd-peer.vpc-prod.svc:2379]}"] [publish-timeout=11s] [error=“etcdserver: server stopped”]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:333] [“stopping remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [pipeline.go:86] [“stopped HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:340] [“stopped remote peer”] [remote-peer-id=b012f094b5d4c3a9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:333] [“stopping remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [WARN] [stream.go:301] [“stopped TCP streaming connection with remote peer”] [stream-writer-type=“unknown stream”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [pipeline.go:86] [“stopped HTTP pipelining with remote peer”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream MsgApp v2”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [stream.go:459] [“stopped stream reader with remote peer”] [stream-reader-type=“stream Message”] [local-member-id=5a8e9dd5ea2f7640] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:50:09.545 +08:00] [INFO] [peer.go:340] [“stopped remote peer”] [remote-peer-id=fd69404a6e9a1ae9]

[2022/03/15 14:55:08.479 +08:00] [FATAL] [main.go:121] [“run server failed”] [error="[PD:server:ErrCancelStartEtcd]etcd start canceled"] [stack=“github.com/pingcap/log.Fatal\ \t/go/pkg/mod/github.com/pingcap/log@v0.0.0-20210317133921-96f4fcab92a4/global.go:62\ main.main\ \t/home/jenkins/agent/workspace/optimization-build-tidb-linux-amd/go/src/github.com/pingcap/pd/cmd/pd-server/main.go:121\ runtime.main\ \t/usr/local/go/src/runtime/proc.go:203”]

现在 PD leader 是在哪儿呀?
这个 PD 故障,如果不是 leader 应该不会导致影响 BR 最后的 checksum。

感觉像是 leader 不停的重启,目前只有 pd 的日志,可以发现 br 日志一起来看下

该主题在最后一个回复创建后60天后自动关闭。不再允许新的回复。