TiDB v6.5.0 对启用了动态分区裁剪的分区表使用BINDING后,tidb-server启动失败

TiDB 6.5.0集群OOM后一直重启失败,报错内容为:

goroutine 1 [running]:
github.com/pingcap/tidb/executor.(*Compiler).Compile.func1()
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/executor/compiler.go:72 +0x445
panic({0x4318e40, 0x6ec6870})
	/usr/local/go/src/runtime/panic.go:884 +0x212
github.com/pingcap/tidb/statistics/handle.(*Handle).GetPartitionStats(0xc00027a000?, 0x4f8da00?, 0x4f72898?, {0x0?, 0xc00535da4c?, 0x2?})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/statistics/handle/handle.go:997 +0x2e
github.com/pingcap/tidb/statistics/handle.(*Handle).GetTableStats(...)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/statistics/handle/handle.go:992
github.com/pingcap/tidb/planner/core.(*PlanBuilder).buildDataSource(0xc005d7a340, {0x4fafbb0, 0xc005ae7590}, 0xc005d20340, 0xc0052ef470)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/logical_plan_builder.go:4456 +0x9ce
github.com/pingcap/tidb/planner/core.(*PlanBuilder).buildResultSetNode(0xc005d7a340, {0x4fafbb0?, 0xc005ae7590?}, {0x4fc96b0?, 0xc0052ef420?}, 0x0?)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/logical_plan_builder.go:380 +0x19d
github.com/pingcap/tidb/planner/core.(*PlanBuilder).buildJoin(0xc005d7a340, {0x4fafbb0, 0xc005ae7590}, 0xc0020ba750)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/logical_plan_builder.go:727 +0x85
github.com/pingcap/tidb/planner/core.(*PlanBuilder).buildResultSetNode(0x0?, {0x4fafbb0?, 0xc005ae7590?}, {0x4fc8948?, 0xc0020ba750?}, 0x0?)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/logical_plan_builder.go:367 +0x271
github.com/pingcap/tidb/planner/core.(*PlanBuilder).buildTableRefs(0xc005d7a340?, {0x4fafbb0?, 0xc005ae7590?}, 0xc001c2b780?)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/logical_plan_builder.go:359 +0x85
github.com/pingcap/tidb/planner/core.(*PlanBuilder).buildSelect(0xc005d7a340, {0x4fafbb0, 0xc005ae7590}, 0xc005acb320)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/logical_plan_builder.go:3916 +0x6c7
github.com/pingcap/tidb/planner/core.(*PlanBuilder).Build(0xc005d7a340, {0x4fafbb0, 0xc005ae7590}, {0x4fc4080?, 0xc005acb320?})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/planbuilder.go:804 +0x745
github.com/pingcap/tidb/planner.buildLogicalPlan({0x4fafbb0, 0xc005ae7590}, {0x501e818?, 0xc00027a000}, {0x4fc4080, 0xc005acb320}, 0xc005d7a340)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/optimize.go:461 +0x12f
github.com/pingcap/tidb/planner.optimize({0x4fafbb0, 0xc005ae7590}, {0x501e818?, 0xc00027a000}, {0x4fc4080?, 0xc005acb320?}, {0x4fe5b50, 0xc005ae75f0})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/optimize.go:382 +0x473
github.com/pingcap/tidb/planner.Optimize({0x4fafbb0, 0xc005ae7590}, {0x501e818, 0xc00027a000}, {0x4fc4080, 0xc005acb320}, {0x4fe5b50, 0xc005ae75f0})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/optimize.go:245 +0xf11
github.com/pingcap/tidb/planner/core.(*PlanBuilder).buildExplain(0xc002f91520, {0x4fafbb0, 0xc005ae7590}, 0xc0052ef500)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/planbuilder.go:4783 +0xd9
github.com/pingcap/tidb/planner/core.(*PlanBuilder).Build(0xc002f91520, {0x4fafbb0, 0xc005ae7590}, {0x4fc2c80?, 0xc0052ef500?})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/core/planbuilder.go:779 +0x432
github.com/pingcap/tidb/planner.buildLogicalPlan({0x4fafbb0, 0xc005ae7590}, {0x501e818?, 0xc00027a000}, {0x4fc2c80, 0xc0052ef500}, 0xc002f91520)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/optimize.go:461 +0x12f
github.com/pingcap/tidb/planner.optimize({0x4fafbb0, 0xc005ae7590}, {0x501e818?, 0xc00027a000}, {0x4fc2c80?, 0xc0052ef500?}, {0x4fe5b50, 0xc005ae75f0})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/optimize.go:382 +0x473
github.com/pingcap/tidb/planner.Optimize({0x4fafbb0, 0xc005ae7590}, {0x501e818, 0xc00027a000}, {0x4fc2c80, 0xc0052ef500}, {0x4fe5b50, 0xc005ae75f0})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/planner/optimize.go:245 +0xf11
github.com/pingcap/tidb/executor.(*Compiler).Compile(0xc0044b6fc8, {0x4fafbb0, 0xc005ae7590}, {0x4fc8580, 0xc0052ef500?})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/executor/compiler.go:116 +0x6f8
github.com/pingcap/tidb/session.(*session).ExecuteStmt(0xc00027a000, {0x4fafbb0, 0xc005ae7590}, {0x4fc8580?, 0xc0052ef500})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/session/session.go:2171 +0x54e
github.com/pingcap/tidb/session.(*session).ExecuteInternal(0xc00027a000, {0x4fafbb0, 0xc005ae7590}, {0xc000836240, 0x214}, {0x0, 0x0, 0x0})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/session/session.go:1674 +0x3f2
github.com/pingcap/tidb/bindinfo.getHintsForSQL({0x501e818, 0xc00027a000}, {0xc005aa3a00, 0x1fe})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/bindinfo/handle.go:951 +0x177
github.com/pingcap/tidb/bindinfo.(*BindRecord).prepareHints(0xc005aded00, {0x501e818, 0xc00027a000})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/bindinfo/bind_record.go:178 +0x1e7
github.com/pingcap/tidb/bindinfo.(*BindHandle).newBindRecord(0xc005de7380, {0xc005a6fb30?, 0x1?})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/bindinfo/handle.go:723 +0xbcf
github.com/pingcap/tidb/bindinfo.(*BindHandle).Update(0xc005de7380, 0x1)
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/bindinfo/handle.go:173 +0x6c5
github.com/pingcap/tidb/domain.(*Domain).LoadBindInfoLoop(0xc001166000, {0x501e818, 0xc00027a000}, {0x501e818, 0xc00027a280})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/domain/domain.go:1444 +0xe5
github.com/pingcap/tidb/session.BootstrapSession({0x4fd95f0, 0xc0000b7b30})
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/session/session.go:3301 +0x648
main.createStoreAndDomain()
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/tidb-server/main.go:314 +0x1cb
main.main()
	/home/jenkins/agent/workspace/build-common/go/src/github.com/pingcap/tidb/tidb-server/main.go:214 +0x2ca
1 Like

话说6.5是调整了tidb-server的启动顺序了么?上次重启时候,就觉得启动很慢,看日志耗时是在加载统计信息里

解决了,是因为昨天在优化SQL时候,按照官方文档的管理执行计划指南,所以手动执行了一个CREATE GLOBAL BINDING。刚才用另外一个tidb-server节点把BINDING删除掉,就可以启动成功了

又测试了下,正常表加了binding还是正常启动的,不过分区表,启动了动态分区裁剪,就会导致tidb-server启动失败。可复现例子如下:
PS:(如果想测试的话,一定要保证自己集群有至少2个tidb-server节点,且只重启一个tidb-server,否则恭喜你,你将会得到一个不能启动tidb-server的集群)
PS:(如果想测试的话,一定要保证自己集群有至少2个tidb-server节点,且只重启一个tidb-server,否则恭喜你,你将会得到一个不能启动tidb-server的集群)
PS:(如果想测试的话,一定要保证自己集群有至少2个tidb-server节点,且只重启一个tidb-server,否则恭喜你,你将会得到一个不能启动tidb-server的集群)

CREATE TABLE `t1` (
  `a` varchar(24) COLLATE utf8_unicode_ci DEFAULT NULL,
  PRIMARY KEY (a) /*T![clustered_index] CLUSTERED */
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
PARTITION BY RANGE COLUMNS(`TransactionCfmDate`)
(PARTITION `before_2018` VALUES LESS THAN ("20180101"),
 PARTITION `p202302` VALUES LESS THAN ("20230301"),
 PARTITION `p202303` VALUES LESS THAN ("20230401"),
 PARTITION `p202304` VALUES LESS THAN ("20230501"),
 PARTITION `p202305` VALUES LESS THAN ("20230601"),
 PARTITION `p202306` VALUES LESS THAN ("20230701"),
 PARTITION `p202307` VALUES LESS THAN ("20230801"),
 PARTITION `p202308` VALUES LESS THAN ("20230901"),
 PARTITION `p202309` VALUES LESS THAN ("20231001"),
 PARTITION `p202310` VALUES LESS THAN ("20231101"),
 PARTITION `p202311` VALUES LESS THAN ("20231201"),
 PARTITION `p202312` VALUES LESS THAN ("20240101"),
 PARTITION `pfuture` VALUES LESS THAN (MAXVALUE));
set global tidb_partition_prune_mode='dynamic';
analyze table t1;
create global binding for select * from test.t1 using select * from test.t1;

https://github.com/pingcap/tidb/issues/40368 麻烦关注这个 issue,我们会尽快 push fix,多谢反馈!

2 Likes