看了下源代码,发现是开启export-fix-sql = true
才会走到报错的逻辑,先设置不导出来规避一下。看代码逻辑,应该是跟没有改隔离级别,导致在分片读取数据生成fix文件时候,数据有变化导致两个分片加起来的count和原始count不一致。不过这种问题最好是打一下error日志,在最后summary里打印出错误原因,而不是用Fatal级别,直接退出整个程序运行
} else if !isEqual && df.exportFixSQL {
state = checkpoints.FailedState
// if the chunk's checksum differ, try to do binary check
info := rangeInfo
if upCount > splitter.SplitThreshold {
log.Debug("count greater than threshold, start do bingenerate", zap.Any("chunk id", rangeInfo.ChunkRange.Index), zap.Int64("upstream chunk size", upCount))
info, err = df.BinGenerate(ctx, df.workSource, rangeInfo, upCount)
if err != nil {
log.Error("fail to do binary search.", zap.Error(err))
df.report.SetTableMeetError(schema, table, err)
// reuse rangeInfo to compare data
info = rangeInfo
} else {
log.Debug("bin generate finished", zap.Reflect("chunk", info.ChunkRange), zap.Any("chunk id", info.ChunkRange.Index))
}
}