8.5.2 tikv异常重启,TiKV assertion panic related to CDC

【TiDB 使用环境】生产环境
【TiDB 版本】8.5.2
【操作系统】centos7.9
【部署方式】机器部署

【遇到的问题:问题现象及影响】

Tikv异常重启, 看日志是因为cdc导致的,论坛里面也有类似问题,按道理说8.5.2应该没有这个问题了。

如:TiDB升级到7.1以后经常会出现TiKV的CDC模块导致TiKV实例崩溃重启的情况
tikv异常重启 - #2,来自 有猫万事足

错误日志:

[2025/09/26 09:17:32.416 +08:00] [INFO] [background.rs:554] [“ime region gc complete”] [current_safe_point=461071783602683904] [below_safe_point_delete_version=0] [below_safe_point_version=94077] [below_safe_point_unique_keys=94077] [filtered_version=0] [total_version=94
077] [gc_duration=23.350068ms] [epoch_version=10956] [region_id=75402268] [thread_id=50]
[2025/09/26 09:17:33.338 +08:00] [FATAL] [lib.rs:480] [“assertion left == right failed\n left: TimeStamp(461071830566043731)\n right: TimeStamp(461071830683746335)”] [backtrace=" 0: tikv_util::set_panic_hook::{{closure}}\n at /workspace/source/tikv/compo
nents/tikv_util/src/lib.rs:479:18\n 1: <alloc::boxed::Box<F,A> as core::ops::function::Fn>::call\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2029:9\n std::panicking::ru
st_panic_with_hook\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:783:13\n 2: std::panicking::begin_panic_handler::{{closure}}\n at /root/.rustup/toolchains/nightly-202
3-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:657:13\n 3: std::sys_common::backtrace::__rust_end_short_backtrace\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/
src/sys_common/backtrace.rs:171:18\n 4: rust_begin_unwind\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:645:5\n 5: core::panicking::panic_fmt\n at /root/.rustup/tool
chains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panicking.rs:72:14\n 6: core::panicking::assert_failed_inner\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core
/src/panicking.rs:342:17\n 7: core::panicking::assert_failed\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panicking.rs:297:5\n 8: cdc::delegate::Delegate::push_lock\n at /works
pace/source/tikv/components/cdc/src/delegate.rs:374:21\n cdc::delegate::Delegate::sink_txn_put\n at /workspace/source/tikv/components/cdc/src/delegate.rs:1054:41\n cdc::delegate::Delegate::sink_put\n at /workspace/source/tikv/components/
cdc/src/delegate.rs:1003:13\n cdc::delegate::Delegate::sink_data\n at /workspace/source/tikv/components/cdc/src/delegate.rs:871:33\n 9: cdc::delegate::Delegate::on_batch\n at /workspace/source/tikv/components/cdc/src/delegate.rs:734:17\n 1
0: cdc::endpoint::Endpoint<T,E,S>::on_multi_batch\n at /workspace/source/tikv/components/cdc/src/endpoint.rs:999:33\n <cdc::endpoint::Endpoint<T,E,S> as tikv_util::worker::pool::Runnable>::run\n at /workspace/source/tikv/components/cdc/src/en
dpoint.rs:1225:18\n 11: tikv_util::worker::pool::Worker::start_with_timer_impl::{{closure}}\n at /workspace/source/tikv/components/tikv_util/src/worker/pool.rs:532:25\n <tracker::tls::TrackedFuture as core::future::future::Future>::poll::{{closure}}
n at /workspace/source/tikv/components/tracker/src/tls.rs:64:23\n std::thread::local::LocalKey::try_with\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/local.rs:270:16
n std::thread::local::LocalKey::with\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/local.rs:246:9\n <tracker::tls::TrackedFuture as core::future::future::Future>::poll\n
at /workspace/source/tikv/components/tracker/src/tls.rs:62:27\n <futures_util::future::future::map::Map<Fut,F> as core::future::future::Future>::poll\n at /workspace/.cargo/registry/src/mirrors.tuna.tsinghua.edu.cn-df7c3c540f42cdbd/futures-uti
l-0.3.31/src/future/future/map.rs:55:37\n <futures_util::future::future::Map<Fut,F> as core::future::future::Future>::poll\n at /workspace/.cargo/registry/src/mirrors.tuna.tsinghua.edu.cn-df7c3c540f42cdbd/futures-util-0.3.31/src/lib.rs:86:13\n yatp:
:task::future::RawTask::poll\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/task/future.rs:59:9\n 12: yatp::task::future::TaskCell::poll\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/task/future
.rs:103:9\n <yatp::task::future::Runner as yatp::pool::runner::Runner>::handle\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/task/future.rs:387:20\n 13: <tikv_util::yatp_pool::YatpPoolRunner as yatp::pool::runner::Runner>::ha
ndle\n at /workspace/source/tikv/components/tikv_util/src/yatp_pool/mod.rs:199:24\n yatp::pool::worker::WorkerThread<T,R>::run\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/pool/worker.rs:48:13\n yatp::pool::buil
der::LazyBuilder::build::{{closure}}\n at /workspace/.cargo/git/checkouts/yatp-e704b73c3ee279b6/793be4d/src/pool/builder.rs:114:25\n std::sys_common::backtrace::rust_begin_short_backtrace\n at /root/.rustup/toolchains/nightly-2023-12-28-
x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:155:18\n 14: std::thread::Builder::spawn_unchecked
::{{closure}}::{{closure}}\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust
/library/std/src/thread/mod.rs:529:17\n <core::panic::unwind_safe::AssertUnwindSafe as core::ops::function::FnOnce<()>>::call_once\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/panic/un
wind_safe.rs:272:9\n std::panicking::try::do_call\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:552:40\n std::panicking::try\n at /root/.rustup/toolchains/nigh
tly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:516:19\n std::panic::catch_unwind\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:142:14\n
std::thread::Builder::spawn_unchecked
::{{closure}}\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:528:30\n core::ops::function::FnOnce::call_once{{vtable.shim}}\n
at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5\n 15: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce>::call_once\n at /root/.rustup/toolchains/nightly-2023-
12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2015:9\n <alloc::boxed::Box<F,A> as core::ops::function::FnOnce>::call_once\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rus
t/library/alloc/src/boxed.rs:2015:9\n std::sys::unix::thread::thread::new::thread_start\n at /root/.rustup/toolchains/nightly-2023-12-28-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys/unix/thread.rs:108:17\n 16: start_thread\n 17: __
clone\n"] [location=components/cdc/src/delegate.rs:374] [thread_name=cdc-0] [thread_id=56]
[2025/09/26 09:18:10.476 +08:00] [INFO] [lib.rs:91] [“Welcome to TiKV”] [thread_id=1]
[2025/09/26 09:18:10.477 +08:00] [INFO] [lib.rs:96] [“Release Version: 8.5.2”] [thread_id=1]
[2025/09/26 09:18:10.477 +08:00] [INFO] [lib.rs:96] [“Edition: Community”] [thread_id=1]
[2025/09/26 09:18:10.477 +08:00] [INFO] [lib.rs:96] [“Git Commit Hash: a150e4569fda1c64763fda297f4e09775759de4a”] [thread_id=1]
[2025/09/26 09:18:10.477 +08:00] [INFO] [lib.rs:96] [“Git Commit Branch: HEAD”] [thread_id=1]

这个看起来像是各节点 tso时间戳不一致,开启ntp服务保证所有节点时间戳一致试试

确认过了,时间都一样。 这个情况属于偶发, 我看论坛有其他人早期反馈过这个问题。 不确定这个版本是否还存在

我看了下内部 release 计划,8.5.4 才能修复。
TiKV assertion panic related to CDC · Issue #18498 · tikv/tikv · GitHub 这个问题

疑似cdc组件存在未完全解决的并发或时序逻辑缺陷?

这个应该是偶发。。

此话题已在最后回复的 7 天后被自动关闭。不再允许新回复。