TIKV 有一个节点离线,无法启动

集群是
3 tidb+3 tipd+3tikv+3tiflash
突然出现有其中一个节点离线,
尝试使用 tiup cluseter start clustername -N nodeip:port 方式重新启动,显示成功但接口没有启动成功
相应的节点日志如附件:

tikv.log (91.7 MB)

最后一部分的日志如下

[2022/07/29 19:59:10.415 +07:00] [INFO] [store.rs:1221] [“cleans up garbage data”] [takes=226.256µs] [garbage_range_count=516] [store_id=3]
[2022/07/29 19:59:10.425 +07:00] [WARN] [store.rs:1647] [“set thread priority for raftstore failed”] [error=“Os { code: 13, kind: PermissionDenied, message: "Permission denied" }”]
[2022/07/29 19:59:10.425 +07:00] [INFO] [gc_worker.rs:1073] [“initialize compaction filter to perform GC when necessary”]
[2022/07/29 19:59:10.425 +07:00] [INFO] [compaction_filter.rs:171] [“initialize GC context for compaction filter”]
[2022/07/29 19:59:10.445 +07:00] [INFO] [future.rs:149] [“starting working thread”] [worker=waiter-manager]
[2022/07/29 19:59:10.446 +07:00] [INFO] [future.rs:149] [“starting working thread”] [worker=deadlock-detector]
[2022/07/29 19:59:10.448 +07:00] [INFO] [server.rs:264] [“listening on addr”] [addr=0.0.0.0:20160]
[2022/07/29 19:59:10.455 +07:00] [INFO] [server.rs:309] [“TiKV is ready to serve”]
[2022/07/29 19:59:10.542 +07:00] [FATAL] [lib.rs:491] [“[region 15009] 15012 data is corrupted at 8537135: WireError(IncorrectTag(0))”] [backtrace=" 0: tikv_util::set_panic_hook::{{closure}}
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/tikv_util/src/lib.rs:490:18
1: std::panicking::rust_panic_with_hook
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:702:17
2: std::panicking::begin_panic_handler::{{closure}}
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:588:13
3: std::sys_common::backtrace::__rust_end_short_backtrace
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:138:18
4: rust_begin_unwind
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:584:5
5: core::panicking::panic_fmt
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panicking.rs:143:14
6: raftstore::store::util::parse_data_at::{{closure}}
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/util.rs:707:9
core::result::Result<T,E>::unwrap_or_else
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/result.rs:1428:23
raftstore::store::util::parse_data_at
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/util.rs:706:5
7: raftstore::store::fsm::apply::ApplyDelegate::handle_raft_entry_normal
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/fsm/apply.rs:1052:23
raftstore::store::fsm::apply::ApplyDelegate::handle_raft_committed_entries
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/fsm/apply.rs:980:43
8: raftstore::store::fsm::apply::ApplyFsm::handle_apply
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/fsm/apply.rs:3362:9
9: raftstore::store::fsm::apply::ApplyFsm::handle_tasks
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/fsm/apply.rs:3643:25
10: <raftstore::store::fsm::apply::ApplyPoller as batch_system::batch::PollHandler<raftstore::store::fsm::apply::ApplyFsm,raftstore::store::fsm::apply::ControlFsm>>::handle_normal
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/fsm/apply.rs:3892:9
11: batch_system::batch::Poller<N,C,Handler>::poll
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/batch-system/src/batch.rs:421:27
12: raftstore::store::worker::refresh_config::PoolController<N,C,H>::increase_by::{{closure}}
at /home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/worker/refresh_config.rs:76:21
std::sys_common::backtrace::rust_begin_short_backtrace
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:122:18
13: std::thread::Builder::spawn_unchecked
::{{closure}}::{{closure}}
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:498:17
<core::panic::unwind_safe::AssertUnwindSafe as core::ops::function::FnOnce<()>>::call_once
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panic/unwind_safe.rs:271:9
std::panicking::try::do_call
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:492:40
std::panicking::try
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:456:19
std::panic::catch_unwind
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:137:14
std::thread::Builder::spawn_unchecked
::{{closure}}
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:497:30
core::ops::function::FnOnce::call_once{{vtable.shim}}
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:227:5
14: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce>::call_once
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:1854:9
<alloc::boxed::Box<F,A> as core::ops::function::FnOnce>::call_once
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:1854:9
std::sys::unix::thread::thread::new::thread_start
at /rust/toolchains/nightly-2022-02-14-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys/unix/thread.rs:108:17
15: start_thread
16: __clone
"] [location=/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tikv/components/raftstore/src/store/util.rs:707] [thread_name=apply-0]

pd-ctl store 确认下tikv的storeid
pd-ctl region 15009 看下信息,如果leader 在正常tikv上 ,试试手动把这个损坏的region副本 从有问题的tikv上移除 pd-ctl operator add remove-peer <region_id> <from_store_id>

该主题在最后一个回复创建后60天后自动关闭。不再允许新的回复。