Hi 请教下使用 sync_diff_inspector 进行数据校验分区表时,配置文件里的日志明明写了时间范围,但是还是会扫描到其他的分区,这个表非常大,如果全表扫发现会把 tikv 的 cpu 打满,请教下对于分区表是否有方法可以直接定位到指定分区进行校验?
表结构
CREATE TABLE `t1` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '主键id',
`a` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
`b` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
`c` varchar(128) NOT NULL DEFAULT '' COMMENT '',
`d` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
`e` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
`f` date NOT NULL DEFAULT '0001-01-01' COMMENT '',
PRIMARY KEY (`id`,`f`) /*T![clustered_index] NONCLUSTERED */
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=111 COMMENT='xxx'
PARTITION BY RANGE COLUMNS(`f`)
(PARTITION `p20230621` VALUES LESS THAN ("20230622"),
PARTITION `p20241106` VALUES LESS THAN ('20241107'),
PARTITION `p20241107` VALUES LESS THAN ('20241108'),
PARTITION `p20241108` VALUES LESS THAN ('20241109'),
PARTITION `p20241109` VALUES LESS THAN ('20241110'),
PARTITION `p20241110` VALUES LESS THAN ('20241111'),
PARTITION `p20241111` VALUES LESS THAN ('20241112'),
PARTITION `p20241112` VALUES LESS THAN ('20241113'),
PARTITION `p20241113` VALUES LESS THAN ('20241114'),
PARTITION `p20241114` VALUES LESS THAN ('20241115'),
PARTITION `p20241115` VALUES LESS THAN ('20241116'),
PARTITION `p20241116` VALUES LESS THAN ('20241117'),
PARTITION `p20241117` VALUES LESS THAN ('20241118'),
PARTITION `p20241118` VALUES LESS THAN ('20241119'),
PARTITION `p20241119` VALUES LESS THAN ('20241120'),
PARTITION `p20241120` VALUES LESS THAN ('20241121'),
PARTITION `p20241121` VALUES LESS THAN ('20241122'),
PARTITION `p20241122` VALUES LESS THAN ('20241123'),
PARTITION `p20241123` VALUES LESS THAN ('20241124'),
PARTITION `p20241124` VALUES LESS THAN ('20241125'),
PARTITION `p20241125` VALUES LESS THAN ('20241126'),
PARTITION `p20241126` VALUES LESS THAN ('20241127'))
源端的SQL
SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `a`, `b`, `c`, `d`, `e`, `f`, CONCAT(ISNULL(`id`), ISNULL(`a`), ISNULL(`b`), ISNULL(`c`), ISNULL(`d`), ISNULL(`e`), ISNULL(`f`)))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `a`, `b`, `c`, `d`, `e`, `f`, CONCAT(ISNULL(`id`), ISNULL(`a`), ISNULL(`b`), ISNULL(`c`), ISNULL(`d`), ISNULL(`e`), ISNULL(`f`)))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `db1`.`t1` WHERE ((((`id` > '56923299712') OR (`id` = '56923299712' AND `f` > '2024-11-06')) AND ((`id` < '56923536466') OR (`id` = '56923536466' AND `f` <= '2024-11-06'))) AND (TRUE))
配置文件
check-thread-count = 8
export-fix-sql = true
check-struct-only = false
skip-non-existing-table = false
[data-sources]
[data-sources.tidb01]
host = "xx"
port = 4000
user = "root"
password = "xx"
[data-sources.tidb02]
host = "xx
port = 4000
user = "root"
password = "xx"
[task]
output-dir = "xxx/logs"
source-instances = ["tidb01"]
target-instance = "tidb02"
target-check-tables = ["xxx.t1"]
target-configs = ["config1"]
[table-configs.config1]
chunk-size = 10000
range = "f >= '2024-11-11 00:00:00' AND f <= '2024-11-11 23:59:59'"
sync_diff_inspector 版本
sync_diff_inspector --version
App Name: sync_diff_inspector
Release Version: v7.5.3-2-gd226440
Git Commit Hash: d226440121147098eb5eb99cbc1efb94092ec68e
Git Branch: HEAD
UTC Build Time: 2024-08-18 05:45:29
Go Version: go1.21.10
···```