Hi 请教下使用 sync_diff_inspector 进行数据校验分区表时,配置文件里的日志明明写了时间范围,但是还是会扫描到其他的分区,这个表非常大,如果全表扫发现会把 tikv 的 cpu 打满,请教下对于分区表是否有方法可以直接定位到指定分区进行校验?
表结构
CREATE TABLE `t1` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '主键id',
  `a` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
  `b` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
  `c` varchar(128) NOT NULL DEFAULT '' COMMENT '',
  `d` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
  `e` bigint(20) NOT NULL DEFAULT '0' COMMENT '',
  `f` date NOT NULL DEFAULT '0001-01-01' COMMENT '',
  PRIMARY KEY (`id`,`f`) /*T![clustered_index] NONCLUSTERED */
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=111 COMMENT='xxx'
PARTITION BY RANGE COLUMNS(`f`)
(PARTITION `p20230621` VALUES LESS THAN ("20230622"),
 PARTITION `p20241106` VALUES LESS THAN ('20241107'),
 PARTITION `p20241107` VALUES LESS THAN ('20241108'),
 PARTITION `p20241108` VALUES LESS THAN ('20241109'),
 PARTITION `p20241109` VALUES LESS THAN ('20241110'),
 PARTITION `p20241110` VALUES LESS THAN ('20241111'),
 PARTITION `p20241111` VALUES LESS THAN ('20241112'),
 PARTITION `p20241112` VALUES LESS THAN ('20241113'),
 PARTITION `p20241113` VALUES LESS THAN ('20241114'),
 PARTITION `p20241114` VALUES LESS THAN ('20241115'),
 PARTITION `p20241115` VALUES LESS THAN ('20241116'),
 PARTITION `p20241116` VALUES LESS THAN ('20241117'),
 PARTITION `p20241117` VALUES LESS THAN ('20241118'),
 PARTITION `p20241118` VALUES LESS THAN ('20241119'),
 PARTITION `p20241119` VALUES LESS THAN ('20241120'),
 PARTITION `p20241120` VALUES LESS THAN ('20241121'),
 PARTITION `p20241121` VALUES LESS THAN ('20241122'),
 PARTITION `p20241122` VALUES LESS THAN ('20241123'),
 PARTITION `p20241123` VALUES LESS THAN ('20241124'),
 PARTITION `p20241124` VALUES LESS THAN ('20241125'),
 PARTITION `p20241125` VALUES LESS THAN ('20241126'),
 PARTITION `p20241126` VALUES LESS THAN ('20241127'))
源端的SQL
SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `a`, `b`, `c`, `d`, `e`, `f`, CONCAT(ISNULL(`id`), ISNULL(`a`), ISNULL(`b`), ISNULL(`c`), ISNULL(`d`), ISNULL(`e`), ISNULL(`f`)))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `a`, `b`, `c`, `d`, `e`, `f`, CONCAT(ISNULL(`id`), ISNULL(`a`), ISNULL(`b`), ISNULL(`c`), ISNULL(`d`), ISNULL(`e`), ISNULL(`f`)))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `db1`.`t1` WHERE ((((`id` > '56923299712') OR (`id` = '56923299712' AND `f` > '2024-11-06')) AND ((`id` < '56923536466') OR (`id` = '56923536466' AND `f` <= '2024-11-06'))) AND (TRUE))
配置文件
check-thread-count = 8
export-fix-sql = true
check-struct-only = false
skip-non-existing-table = false
[data-sources]
[data-sources.tidb01]
    host = "xx"
    port = 4000
    user = "root"
    password = "xx"
[data-sources.tidb02]
    host = "xx
    port = 4000
    user = "root"
    password = "xx"
[task]
    output-dir = "xxx/logs"
    source-instances = ["tidb01"]
    target-instance = "tidb02"
    target-check-tables = ["xxx.t1"]
    target-configs = ["config1"]
[table-configs.config1]
chunk-size = 10000
range = "f >= '2024-11-11 00:00:00' AND f <= '2024-11-11 23:59:59'"
sync_diff_inspector 版本
sync_diff_inspector --version
App Name: sync_diff_inspector
Release Version: v7.5.3-2-gd226440
Git Commit Hash: d226440121147098eb5eb99cbc1efb94092ec68e
Git Branch: HEAD
UTC Build Time: 2024-08-18 05:45:29
Go Version: go1.21.10
···```