tikv-生产bug-经常panic重启

【TiDB 使用环境】生产环境

业务上写入大量使用 on dub update 目前开启cdc往备份集群推数据之后 经常tikv节点重启

未进行备份集群cdc任务时,未见异常

日志有:
[2025/09/28 12:54:03.927 +08:00] [FATAL] [lib.rs:480] [“assertion left == right failed\n left: TimeStamp(461120534839033880)\n right: TimeStamp(461120534839033913)”] [backtrace=" 0: tikv_util::set_panic_hook::{{closure}}\n at /workspace/source/tikv/components/tikv_util/src/lib.rs:479:18\n 1: <alloc::boxed::Box<F,A> as core::ops::function::Fn>::call\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2029:9\n std::panicking::rust_panic_with_hook\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:783:13\n 2: std::panicking::begin_panic_handler::{{closure}}\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:657:13\n 3: std::sys_common::backtrace::__rust_end_short_backtrace\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:171:18\n 4: rust_begin_unwind\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:645:5\n 5: core::panicking::panic_fmt\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panicking.rs:72:14\n 6: core::panicking::assert_failed_inner\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panicking.rs:342:17\n 7: core::panicking::assert_failed\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panicking.rs:297:5\n 8: cdc::delegate::Delegate::push_lock\n at /workspace/source/tikv/components/cdc/src/delegate.rs:374:21\n cdc::delegate::Delegate::sink_txn_put\n at /workspace/source/tikv/components/cdc/src/delegate.rs:1054:41\n cdc::delegate::Delegate::sink_put\n at /workspace/source/tikv/components/cdc/src/delegate.rs:1003:13\n cdc::delegate::Delegate::sink_data\n at /workspace/source/tikv/components/cdc/src/delegate.rs:871:33\n 9: cdc::delegate::Delegate::on_batch\n at /workspace/source/tikv/components/cdc/src/delegate.rs:734:17\n 10: cdc::endpoint::Endpoint<T,E,S>::on_multi_batch\n at /workspace/source/tikv/components/cdc/src/endpoint.rs:999:33\n <cdc::endpoint::Endpoint<T,E,S> as tikv_util::worker::pool::Runnable>::run\n at /workspace/source/tikv/components/cdc/src/endpoint.rs:1225:18\n 11: tikv_util::worker::pool::Worker::start_with_timer_impl::{{closure}}\n at /workspace/source/tikv/components/tikv_util/src/worker/pool.rs:532:25\n <tracker::tls::TrackedFuture as core::future::future::Future>::poll::{{closure}}\n at /workspace/source/tikv/components/tracker/src/tls.rs:64:23\n std::thread::local::LocalKey::try_with\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/local.rs:270:16\n std::thread::local::LocalKey::with\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/local.rs:246:9\n <tracker::tls::TrackedFuture as core::future::future::Future>::poll\n at /workspace/source/tikv/components/tracker/src/tls.rs:62:27\n <futures_util::future::future::map::Map<Fut,F> as core::future::future::Future>::poll\n at /workspace/.cargo/registry/src/mirrors.tuna.tsinghua.edu.cn-df7c3c540f42cdbd/futures-util-0.3.31/src/future/future/map.rs:55:37\n <futures_util::future::future::Map<Fut,F> as core::future::future::Future>::poll\n at /workspace/.cargo/registry/src/mirrors.tuna.tsinghua.edu.cn-df7c3c540f42cdbd/futures-util-0.3.31/src/lib.rs:86:13\n yatp::task::future::RawTask::poll\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/task/future.rs:59:9\n 12: yatp::task::future::TaskCell::poll\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/task/future.rs:103:9\n <yatp::task::future::Runner as yatp::pool::runner::Runner>::handle\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/task/future.rs:387:20\n 13: <tikv_util::yatp_pool::YatpPoolRunner as yatp::pool::runner::Runner>::handle\n at /workspace/source/tikv/components/tikv_util/src/yatp_pool/mod.rs:199:24\n yatp::pool::worker::WorkerThread<T,R>::run\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/pool/worker.rs:48:13\n yatp::pool::builder::LazyBuilder::build::{{closure}}\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/pool/builder.rs:114:25\n std::sys_common::backtrace::rust_begin_short_backtrace\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:155:18\n 14: std::thread::Builder::spawn_unchecked::{{closure}}::{{closure}}\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:529:17\n <core::panic::unwind_safe::AssertUnwindSafe as core::ops::function::FnOnce<()>>::call_once\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panic/unwind_safe.rs:272:9\n std::panicking::try::do_call\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:552:40\n std::panicking::try\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:516:19\n std::panic::catch_unwind\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:142:14\n std::thread::Builder::spawn_unchecked::{{closure}}\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:528:30\n core::ops::function::FnOnce::call_once{{vtable.shim}}\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5\n 15: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce>::call_once\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2015:9\n <alloc::boxed::Box<F,A> as core::ops::function::FnOnce>::call_once\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2015:9\n std::sys::unix::thread::thread::new::thread_start\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys/unix/thread.rs:108:17\n 16: start_thread\n 17: clone\n"] [location=components/cdc/src/delegate.rs:374] [thread_name=cdc-0] [thread_id=49]

left == right failed\n left: TimeStamp(461120534839033880)\n right: TimeStamp(461120534839033913)”]”
看日志可能是由高并发触发了cdc模块时间戳校验断言失败,导致tikv panic重启?建议升级到 tidb v7.5.8以上的版本或。

目前8.5.2

这情况和我差不多,像是之前cdc的bug

哥们 后面怎么解决

我也在等回复。。

感觉,这个还是很严重啊,天天宕机,会降低信任度。

等待中

解决了吗?

未解决 最新版本8.5.3也有问题

感谢反馈,已反馈给产研
https://github.com/tikv/tikv/issues/18498

具体的处理进度可见这个链接,

目前问题正在定位中,还没有找到影响的根本原因,如有新进度,我也随时同步给你们

感谢反馈 @xmlianfeng @TiDBer_yyy @HGHNICE_666 如果 BUG 需要配合产研定位,我也会第一时间联系你们寻求帮助。

客戶 v8.5.2 與 v8.5.3 報的 timestamp 是一樣的嗎

assertion left == right failed\n left: TimeStamp(461120534839033880)\n right: TimeStamp(461120534839033913)

8.5.3报错日志的timestamp也是不一样的