sync_diff_inspector在对比时经常因为[FATAL] [diff.go:538] ["the count is not correct"]退出核对

dba-kit · 2022 年11 月 24 日 03:24

看了下源代码，发现是开启export-fix-sql = true才会走到报错的逻辑，先设置不导出来规避一下。看代码逻辑，应该是跟没有改隔离级别，导致在分片读取数据生成fix文件时候，数据有变化导致两个分片加起来的count和原始count不一致。不过这种问题最好是打一下error日志，在最后summary里打印出错误原因，而不是用Fatal级别，直接退出整个程序运行

	} else if !isEqual && df.exportFixSQL {
		state = checkpoints.FailedState
		// if the chunk's checksum differ, try to do binary check
		info := rangeInfo
		if upCount > splitter.SplitThreshold {
			log.Debug("count greater than threshold, start do bingenerate", zap.Any("chunk id", rangeInfo.ChunkRange.Index), zap.Int64("upstream chunk size", upCount))
			info, err = df.BinGenerate(ctx, df.workSource, rangeInfo, upCount)
			if err != nil {
				log.Error("fail to do binary search.", zap.Error(err))
				df.report.SetTableMeetError(schema, table, err)
				// reuse rangeInfo to compare data
				info = rangeInfo
			} else {
				log.Debug("bin generate finished", zap.Reflect("chunk", info.ChunkRange), zap.Any("chunk id", info.ChunkRange.Index))
			}
		}