tiflash大量报错 DB::Exception: Exchange receiver meet error : Receive cancel request from TiDB

Bug 反馈
清晰准确地描述您发现的问题,提供任何可能复现问题的步骤有助于研发同学及时处理问题
【 Bug 的影响】
分区表更新 异常 ,去掉tiflash 副本之后 才能更新正常。
update warning_message force index(PRIMARY) set group_status = 1,ding_task_id=‘522660803969’ where id in(4890064,4890065)
【可能的问题复现步骤】

【看到的非预期行为】


2022.02.17 14:56:29.592282 [ 19450443 ] task 3: task running meets error DB::Exception: Exchange receiver meet error : Receive cancel request from TiDB St
ack Trace : 0. /tiflash/tiflash(StackTrace::StackTrace()+0x16) [0x3921fd6]

  1. /tiflash/tiflash(DB::Exception::Exception(std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&, int)+0x26) [0x3915e56]
  2. /tiflash/tiflash(DB::TiRemoteBlockInputStreamDB::ExchangeReceiver::fetchRemoteResult()+0x2f1) [0x76b43d1]
  3. /tiflash/tiflash(DB::TiRemoteBlockInputStreamDB::ExchangeReceiver::readImpl()+0x3a3) [0x76b5363]
  4. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  5. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  6. /tiflash/tiflash(DB::SquashingBlockInputStream::readImpl()+0x74) [0x7c16954]
  7. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  8. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  9. /tiflash/tiflash(DB::ExpressionBlockInputStream::readImpl()+0x1b) [0x7bc1adb]
  10. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  11. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  12. /tiflash/tiflash(DB::ExpressionBlockInputStream::readImpl()+0x1b) [0x7bc1adb]
  13. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  14. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  15. /tiflash/tiflash(DB::HashJoinBuildBlockInputStream::readImpl()+0x1b) [0x80a282b]
  16. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  17. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  18. /tiflash/tiflash(DB::ParallelInputsProcessor<DB::UnionBlockInputStream<(DB::StreamUnionMode)0>::Handler, (DB::StreamUnionMode)0>::loop(unsigned long)+0x156) [
    0x773ef76]
  19. /tiflash/tiflash(DB::ParallelInputsProcessor<DB::UnionBlockInputStream<(DB::StreamUnionMode)0>::Handler, (DB::StreamUnionMode)0>::thread(unsigned long)+0x20c)
    [0x773f92c]
  20. /tiflash/tiflash() [0x8e571bf]
  21. /lib64/libpthread.so.0(+0x7dd5) [0x7fb5195e4dd5]
  22. /lib64/libc.so.6(clone+0x6d) [0x7fb51900bead]

2022.02.17 14:56:29.618009 [ 19450443 ] task 3: Failed to write error DB::Exception: Exchange receiver meet error : Receive cancel request from TiDB to tu
nnel: tunnel3±1: Code: 0, e.displayText() = DB::Exception: MPPTunnel can not be connected because MPPTask is cancelled, e.what() = DB::Exception, Stack trace:

  1. /tiflash/tiflash(StackTrace::StackTrace()+0x16) [0x3921fd6]
  2. /tiflash/tiflash(DB::Exception::Exception(std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&, int)+0x26) [0x3915e56]
  3. /tiflash/tiflash(DB::MPPTunnel::waitUntilConnectedOrCancelled(std::unique_lockstd::mutex&)+0x12d) [0x805763d]
  4. /tiflash/tiflash(DB::MPPTunnel::write(mpp::MPPDataPacket const&, bool)+0x5f6) [0x8058766]
  5. /tiflash/tiflash(DB::MPPTask::writeErrToAllTunnel(std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&)+0x92) [0x8052092]
  6. /tiflash/tiflash(DB::MPPTask::runImpl()+0x11db) [0x805347b]
  7. /tiflash/tiflash() [0x8e571bf]
  8. /lib64/libpthread.so.0(+0x7dd5) [0x7fb5195e4dd5]
  9. /lib64/libc.so.6(clone+0x6d) [0x7fb51900bead]

2022.02.17 14:56:29.618213 [ 19450443 ] TaskManager: The task [431248110620508476,3] cannot be found and fail to unregister
2022.02.17 14:56:29.681897 [ 19450443 ] DB::UnionBlockInputStream::~UnionBlockInputStream() [with DB::StreamUnionMode mode = (DB::StreamUnionMode)0]
: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Receive cancel request from TiDB:
Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Receive cancel request from TiDB, e.what() = DB::Exception, e.what() = DB::Exception, Sta
ck trace:

  1. /tiflash/tiflash(StackTrace::StackTrace()+0x16) [0x3921fd6]
  2. /tiflash/tiflash(DB::Exception::Exception(std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&, int)+0x26) [0x3915e56]
  3. /tiflash/tiflash(DB::TiRemoteBlockInputStreamDB::ExchangeReceiver::fetchRemoteResult()+0x2f1) [0x76b43d1]
  4. /tiflash/tiflash(DB::TiRemoteBlockInputStreamDB::ExchangeReceiver::readImpl()+0x3a3) [0x76b5363]
  5. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  6. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  7. /tiflash/tiflash(DB::SquashingBlockInputStream::readImpl()+0x74) [0x7c16954]
  8. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  9. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  10. /tiflash/tiflash(DB::ExpressionBlockInputStream::readImpl()+0x1b) [0x7bc1adb]
  11. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  12. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  13. /tiflash/tiflash(DB::ExpressionBlockInputStream::readImpl()+0x1b) [0x7bc1adb]
  14. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  15. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  16. /tiflash/tiflash(DB::HashJoinBuildBlockInputStream::readImpl()+0x1b) [0x80a282b]
  17. /tiflash/tiflash(DB::IProfilingBlockInputStream::read(DB::PODArray<unsigned char, 4096ul, Allocator, 15ul, 16ul>*&, bool)+0x3bd) [0x6ffd3bd]
  18. /tiflash/tiflash(DB::IProfilingBlockInputStream::read()+0x17) [0x6ffd5a7]
  19. /tiflash/tiflash(DB::ParallelInputsProcessor<DB::UnionBlockInputStream<(DB::StreamUnionMode)0>::Handler, (DB::StreamUnionMode)0>::loop(unsigned long)+0x156) [
    0x773ef76]
  20. /tiflash/tiflash(DB::ParallelInputsProcessor<DB::UnionBlockInputStream<(DB::StreamUnionMode)0>::Handler, (DB::StreamUnionMode)0>::thread(unsigned long)+0x20c)
    [0x773f92c]
  21. /tiflash/tiflash() [0x8e571bf]
  22. /lib64/libpthread.so.0(+0x7dd5) [0x7fb5195e4dd5]
  23. /lib64/libc.so.6(clone+0x6d) [0x7fb51900bead]
    【期望看到的行为】

【相关组件及具体版本】
tidb 5.2.2 tiflash 5.2.2
【其他背景信息或者截图】
如集群拓扑,系统和内核版本,应用 app 信息等;如果问题跟 SQL 有关,请提供 SQL 语句和相关表的 Schema 信息;如果节点日志存在关键报错,请提供相关节点的日志内容或文件;如果一些业务敏感信息不便提供,请留下联系方式,我们与您私下沟通。

1 个赞


错误日志文件看 10月9号开始的 。
应用反馈异常是 昨天这个表 warning_message 改成按月分区表了, 今天加的tiflash 副本, 一段时间之后 就前端报异常 ,去掉tiflash副本之后 这表的操作不报异常了 ,tiflash 日志 还在报 MPP异常。

1 个赞

如果方便,可以提供下表结构信息吗?

CREATE TABLE warning_message (
id bigint(20) NOT NULL AUTO_INCREMENT COMMENT ‘消息id’,
rule_id bigint(20) NOT NULL COMMENT ‘触发规则id’,
term_id bigint(20) NOT NULL COMMENT ‘触发规则条件id’,
date date NOT NULL COMMENT ‘规则触发日期,方便查询’,
message_type int(11) DEFAULT NULL COMMENT ‘0广告级别,1推广计划,2账户,3客户,4平台,5创意’,
term_type int(11) DEFAULT NULL COMMENT ‘条件类型:0出价,1账户余额,2消耗,3链接,4文案,5定向,6转化成本,7转化数量’,
rule_type int(11) DEFAULT NULL COMMENT ‘规则类型’,
rule_name varchar(216) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘规则名称’,
rule_data_type varchar(30) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘规则数据维度,0分时,1分日,0,1都包含’,
old_value varchar(8192) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘旧值’,
new_value varchar(8192) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘新值’,
title varchar(256) DEFAULT NULL COMMENT ‘消息标题’,
content varchar(8192) DEFAULT NULL COMMENT ‘消息内容’,
operate int(4) DEFAULT NULL COMMENT ‘执行操作’,
customer_id int(10) DEFAULT NULL COMMENT ‘客户id’,
customer_name varchar(100) DEFAULT NULL COMMENT ‘客户名称’,
platform_id int(4) DEFAULT NULL COMMENT ‘平台id,2广点通,3微信,4头条,5快手,6千川’,
platform_name varchar(30) DEFAULT NULL COMMENT ‘平台’,
account_id varchar(30) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘账户id’,
account_name varchar(100) DEFAULT NULL COMMENT ‘账户名称’,
campaign_id bigint(20) DEFAULT NULL COMMENT ‘计划id’,
campaign_name varchar(100) DEFAULT NULL COMMENT ‘推广计划’,
creative_id bigint(20) DEFAULT NULL COMMENT ‘创意id’,
creative_name varchar(100) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘广告创意名称’,
ad_id varchar(30) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘广告id,广点通、微信广告组id,头条广告计划id’,
ad_name varchar(100) DEFAULT NULL COMMENT ‘广告名称,广点通、微信广告组,头条广告计划’,
tx_ad_id varchar(30) DEFAULT NULL COMMENT ‘广点通、微信广告id’,
tx_ad_name varchar(100) DEFAULT NULL COMMENT ‘腾讯广告名称’,
operator varchar(1024) DEFAULT NULL COMMENT ‘处理人,0为自动’,
operator_name varchar(8196) DEFAULT NULL COMMENT ‘处理人名称’,
approver varchar(1024) DEFAULT NULL COMMENT ‘审核人’,
approve_name varchar(8196) DEFAULT NULL COMMENT ‘审核人员名称’,
operates varchar(1024) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘操作方式json’,
send_message int(2) unsigned zerofill DEFAULT ‘0’ COMMENT ‘是否发送钉钉消息;operates中有type=3时,标记1需要发送’,
notify_type varchar(20) DEFAULT NULL COMMENT ‘通知类型,0钉钉,1电话,二者都选为0,1’,
level int(11) DEFAULT NULL COMMENT ‘规则等级,0高,1中,2低,默认1’,
group_status tinyint(1) DEFAULT ‘0’ COMMENT ‘状态,0未发送消息,1已发送消息’,
operate_status tinyint(1) DEFAULT ‘0’ COMMENT ‘自动处理状态0未处理,1处理中,2处理完成’,
status int(11) unsigned zerofill DEFAULT NULL COMMENT ‘消息状态,0待处理,1已处理,2待审核消息,3待关闭,4已上传,5待审核-报告6已关闭;为空表示只是触发规则,不走流程’,
flow_status int(11) unsigned zerofill DEFAULT ‘0’ COMMENT ‘流程当前状态’,
star_tag int(10) unsigned zerofill DEFAULT NULL COMMENT ‘是否标星,0否1是’,
ding_task_id varchar(50) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL COMMENT ‘钉钉消息id’,
create_time datetime DEFAULT CURRENT_TIMESTAMP COMMENT ‘创建时间’,
update_time datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT ‘修改时间’,
PRIMARY KEY (id,date) /*T![clustered_index] CLUSTERED */,
KEY date_rule (date,rule_id,term_id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin AUTO_INCREMENT=5190001 COMMENT=‘预警消息’
PARTITION BY RANGE COLUMNS(date) (
PARTITION p202112 VALUES LESS THAN (“2022-01-01”),
PARTITION p202201 VALUES LESS THAN (“2022-02-01”),
PARTITION p202202 VALUES LESS THAN (“2022-03-01”),
PARTITION p202203 VALUES LESS THAN (“2022-04-01”),
PARTITION p202204 VALUES LESS THAN (“2022-05-01”)
)

1 个赞

这边复现了这个报错。

另外,辛苦拿下的结果
show variables like ‘%tidb_isolation_read_engines%’;

1 个赞

image

1 个赞

MySQL也有类似的报错,我记得好像是通过修改连接池的配置解决的

1 个赞

添加副本的步骤能发一下吗?加了几个?

1 个赞

2个副本

1 个赞

这个问题看起来在 v5.4.0 已 fix,详情见下面的 pr:

https://github.com/pingcap/tidb/pull/31528

5.2.x 有新版本修复这些问题吗?

5.2 版本这个问题对应的 pr 看起来是 open 的状态,这个 pr 具体的进度可关注下面的链接:

https://github.com/pingcap/tidb/pull/31526

此话题已在最后回复的 1 分钟后被自动关闭。不再允许新回复。