不懂就问:tidb集群跨DC(云 Region)迁移

原始集群配置

global:
  user: tidb
  ssh_port: 22
  deploy_dir: /data/tidb-deploy
  data_dir: /data/tidb-data/
  os: linux
  arch: amd64
monitored:
  node_exporter_port: 39100
  blackbox_exporter_port: 39115
  deploy_dir: /data/tidb-deploy/monitor-39100
  data_dir: /data/tidb-data/monitor_data
  log_dir: /data/tidb-deploy/monitor-39100/log
server_configs:
  tidb:
    oom-use-tmp-storage: true
    performance.max-procs: 0
    performance.txn-total-size-limit: 2097152
    prepared-plan-cache.enabled: true
    tikv-client.copr-cache.capacity-mb: 128.0
    tikv-client.max-batch-wait-time: 0
    tmp-storage-path: /data/tidb-data/tmp_oom
    split-table: true
  tikv:
    coprocessor.split-region-on-table: true
    readpool.coprocessor.use-unified-pool: true
    readpool.storage.use-unified-pool: false
    server.grpc-compression-type: none
    storage.block-cache.shared: true
  pd:
    enable-cross-table-merge: false
    replication.enable-placement-rules: true
    schedule.leader-schedule-limit: 4
    schedule.region-schedule-limit: 2048
    schedule.replica-schedule-limit: 64
    replication.location-labels: ["dc","logic","rack","host"]
  tiflash: {}
  tiflash-learner: {}
  pump: {}
  drainer: {}
  cdc: {}
tidb_servers:
- host: 192.168.8.11
  ssh_port: 22
  port: 4000
  status_port: 10080
  deploy_dir: /data/tidb-deploy/tidb_4000
 
 
tikv_servers:
- host: 192.168.8.11
  ssh_port: 22
  port: 20160
  status_port: 20180
  deploy_dir: /data/tidb-deploy/tikv_20160
  data_dir: /data/tidb-data/tikv_20160
 
 
- host: 192.168.8.12
  ssh_port: 22
  port: 20160
  status_port: 20180
  deploy_dir: /data/tidb-deploy/tikv_20160
  data_dir: /data/tidb-data/tikv_20160
   
 
- host: 192.168.8.13
  ssh_port: 22
  port: 20160
  status_port: 20180
  deploy_dir: /data/tidb-deploy/tikv_20160
  data_dir: /data/tidb-data/tikv_20160
   
 
pd_servers:
- host: 192.168.8.11
  ssh_port: 22
  name: pd-192.168.8.11-2379
  client_port: 2379
  peer_port: 2380
  deploy_dir: /data/tidb-deploy/pd_2379
  data_dir: /data/tidb-data/pd_2379

修改集群Lable

tiup cluster edit-config tidb_placement_rule_remove
# 每个TiKV节点新增label配置:
  config:
    server.labels: { dc: "bj1", zone: "1", rack: "1", host: "192.168.8.11_20160" }
   
  config:
    server.labels: { dc: "bj1", zone: "1", rack: "1", host: "192.168.8.12_20160" }
   
   
  config:
    server.labels: { dc: "bj1", zone: "1", rack: "1", host: "192.168.8.13_20160" }     
 
 
# 生效配置变更
tiup cluster reload tidb_placement_rule_remove -R tikv -y

扩容新机房TiKV

tiup cluster scale-out tidb_placement_rule_remove scale-out-pr-test.yaml -u root -p
  • 配置文件
tikv_servers:
 - host: 192.168.8.12
   ssh_port: 22
   port: 20161
   status_port: 20181
   deploy_dir: /data/tidb-deploy/tikv_20161
   data_dir: /data/tidb-data/tikv_20161
   config:
     server.labels: { dc: "bj4",logic: "2",rack: "2",host: "192.168.8.12_20161" }
 - host: 192.168.8.13
   ssh_port: 22
   port: 20161
   status_port: 20181
   deploy_dir: /data/tidb-deploy/tikv_20161
   data_dir: /data/tidb-data/tikv_20161
   config:
     server.labels: { dc: "bj4",logic: "2",rack: "2",host: "192.168.8.13_20161" }
 
 - host: 192.168.8.14
   ssh_port: 22
   port: 20161
   status_port: 20181
   deploy_dir: /data/tidb-deploy/tikv_20161
   data_dir: /data/tidb-data/tikv_20161
   config:
     server.labels: { dc: "bj4",logic: "2",rack: "2",host: "192.168.8.14_20161" }
  • 可以看到扩容后,新节点调度一个follower region给192.168.8.12:20161 机器
SELECT  region.TABLE_NAME,  tikv.address,  case when region.IS_INDEX = 1 then "index" else "data" end as "region-type",  case when peer.is_leader = 1 then region.region_id end as "leader",
 case when peer.is_leader = 0 then region.region_id end as "follower",  case when peer.IS_LEARNER = 1 then region.region_id end as "learner"
FROM  information_schema.tikv_store_status tikv,  information_schema.tikv_region_peers peer, 
(SELECT * FROM information_schema.tikv_region_status where DB_NAME='test' and TABLE_NAME='sbtest1' and IS_INDEX=0) region
WHERE   region.region_id = peer.region_id  AND peer.store_id = tikv.store_id order by 1,3;
 
+------------+--------------------+-------------+--------+----------+---------+
| TABLE_NAME | address            | region-type | leader | follower | learner |
+------------+--------------------+-------------+--------+----------+---------+
| sbtest1    | 192.168.8.13:20160 | data        |   NULL |       16 |    NULL |
| sbtest1    | 192.168.8.11:20160 | data        |   NULL |       16 |    NULL |
| sbtest1    | 192.168.8.12:20160 | data        |     16 |     NULL |    NULL |
+------------+--------------------+-------------+--------+----------+---------+
3 rows in set (0.02 sec)

配置Placement Rule规则

  • dc-bj1 机房有3个voter
  • dc-bj2 机房有2个follower
cat > rules.json <<EOF
[{
  "group_id": "pd",
  "group_index": 0,
  "group_override": false,
  "rules": [
    {
        "group_id": "pd",
        "id": "dc-bj1",
        "start_key": "",
        "end_key": "",
        "role": "voter",
        "count": 3,
        "label_constraints": [
            {"key": "dc", "op": "in", "values": ["bj1"]}
        ],
        "location_labels": ["dc"]
    },
    {
        "group_id": "pd",
        "id": "dc-bj4",
        "start_key": "",
        "end_key": "",
        "role": "follower",
        "count": 2,
        "label_constraints": [
            {"key": "dc", "op": "in", "values": ["bj4"]}
        ],
        "location_labels": ["dc"]
    }
]
}
]
EOF

生效 Placement rule

tiup ctl:v5.0.4 pd --pd=http://127.0.0.1:2379 config placement-rules rule-bundle save --in=rules.json

检查region分布状态

可以看到按照预期分配region调度,并且“bj4” 机房没有分配leader,目前是follower。

MySQL [(none)]> SELECT  region.TABLE_NAME,  tikv.address,  case when region.IS_INDEX = 1 then "index" else "data" end as "region-type",  case when peer.is_leader = 1 then region.region_id end as "leader",  case when peer.is_leader = 0 then region.region_id end as "follower",  case when peer.IS_LEARNER = 1 then region.region_id end as "learner" FROM  information_schema.tikv_store_status tikv,  information_schema.tikv_region_peers peer,  (SELECT * FROM information_schema.tikv_region_status where DB_NAME='test' and TABLE_NAME='sbtest1' and IS_INDEX=0) region WHERE   region.region_id = peer.region_id  AND peer.store_id = tikv.store_id order by 1,3;
+------------+--------------------+-------------+--------+----------+---------+
| TABLE_NAME | address            | region-type | leader | follower | learner |
+------------+--------------------+-------------+--------+----------+---------+
| sbtest1    | 192.168.8.11:20160 | data        |   NULL |        3 |    NULL |
| sbtest1    | 192.168.8.12:20161 | data        |   NULL |        3 |    NULL |
| sbtest1    | 192.168.8.14:20161 | data        |   NULL |        3 |    NULL |
| sbtest1    | 192.168.8.13:20160 | data        |      3 |     NULL |    NULL |
| sbtest1    | 192.168.8.12:20160 | data        |   NULL |        3 |    NULL |
+------------+--------------------+-------------+--------+----------+---------+
  • 集群Placement-rule切换:未选举出bj4 的region-leader
[tidb@centos1 deploy]$ tiup ctl:v5.0.4 pd --pd=http://127.0.0.1:2379 config placement-rules rule-bundle save --in=rules.json
[tidb@centos1 deploy]$ tiup ctl:v5.0.4 pd --pd=http://127.0.0.1:2379 config placement-rules show
 
MySQL [(none)]> SELECT  region.TABLE_NAME,  tikv.address,  case when region.IS_INDEX = 1 then "index" else "data" end as "region-type",  case when peer.is_leader = 1 then region.region_id end as "leader",   case when peer.is_leader = 0 then region.region_id end as "follower",  case when peer.IS_LEARNER = 1 then region.region_id end as "learner"  FROM  information_schema.tikv_store_status tikv,  information_schema.tikv_region_peers peer,   (SELECT * FROM information_schema.tikv_region_status where DB_NAME='test' and TABLE_NAME='sbtest1' and IS_INDEX=0) region  WHERE   region.region_id = peer.region_id  AND peer.store_id = tikv.store_id order by 1,3;
+------------+--------------------+-------------+--------+----------+---------+
| TABLE_NAME | address            | region-type | leader | follower | learner |
+------------+--------------------+-------------+--------+----------+---------+
| sbtest1    | 192.168.8.13:20160 | data        |      3 |     NULL |    NULL |
| sbtest1    | 192.168.8.12:20160 | data        |   NULL |        3 |    NULL |
| sbtest1    | 192.168.8.11:20160 | data        |   NULL |        3 |    NULL |
| sbtest1    | 192.168.8.12:20161 | data        |   NULL |        3 |    NULL |
| sbtest1    | 192.168.8.14:20161 | data        |   NULL |        3 |    NULL |
+------------+--------------------+-------------+--------+----------+---------+
5 rows in set (0.01 sec)