用tiup启动刚安装的ticdc启动不起来,一堆报错?

你用 tiup cluaster edit-config 可以展示目前完整的部署信息

global:
  user: tidb
  ssh_port: 22
  ssh_type: builtin
  deploy_dir: /data/tidb_deploy
  data_dir: /data/tidb_data
  os: linux
  arch: amd64
monitored:
  node_exporter_port: 9100
  blackbox_exporter_port: 9115
  deploy_dir: /data/tidb_deploy/monitor-9100
  data_dir: /data/tidb_data/monitor-9100
  log_dir: /data/tidb_deploy/monitor-9100/log
server_configs:
  tidb:
    binlog.enable: false
    binlog.ignore-error: false
    log.level: warn
    log.slow-threshold: 300
    performance.committer-concurrency: 16384
    performance.max-memory: 1000000000
    prepared-plan-cache.enabled: false
    tikv-client.grpc-connection-count: 128
    token-limit: 1000
  tikv:
    log-level: warn
    raftdb.bytes-per-sync: 256MB
    raftdb.defaultcf.compression-per-level:
    - zstd
    - zstd
    - zstd
    - lz4
    - lz4
    - zstd
    - zstd
    raftdb.wal-bytes-per-sync: 128MB
    raftdb.writable-file-max-buffer-size: 256MB
    raftstore.apply-pool-size: 3
    raftstore.hibernate-regions: true
    raftstore.messages-per-tick: 40960
    raftstore.raft-base-tick-interval: 2s
    raftstore.raft-entry-max-size: 32MB
    raftstore.raft-max-inflight-msgs: 8192
    raftstore.store-pool-size: 4
    raftstore.sync-log: false
    rocksdb.bytes-per-sync: 64MB
    rocksdb.defaultcf.compression-per-level:
    - zstd
    - zstd
    - zstd
    - lz4
    - lz4
    - zstd
    - zstd
    rocksdb.lockcf.compression-per-level:
    - zstd
    - zstd
    - zstd
    - lz4
    - lz4
    - zstd
    - zstd
    rocksdb.raftcf.compression-per-level:
    - zstd
    - zstd
    - zstd
    - lz4
    - lz4
    - zstd
    - zstd
    rocksdb.wal-bytes-per-sync: 32MB
    rocksdb.writecf.compression-per-level:
    - zstd
    - zstd
    - zstd
    - lz4
    - lz4
    - zstd
    - zstd
    storage.block-cache.capacity: 18GB
    storage.scheduler-worker-pool-size: 4
  pd:
    log.level: info
    replication.enable-placement-rules: true
    schedule.leader-schedule-limit: 4
    schedule.region-schedule-limit: 2048
    schedule.replica-schedule-limit: 64
  tiflash:
    logger.level: info
    path_realtime_mode: false
  tiflash-learner: {}
  pump: {}
  drainer: {}
  cdc: {}
tidb_servers:
- host: 10.59.111.225
  ssh_port: 22
  port: 4000
  status_port: 10080
  deploy_dir: /data/tidb_deploy/tidb-4000
  log_dir: /data/tidb_deploy/tidb-4000/log
  arch: amd64
  os: linux
- host: 10.59.111.226
  ssh_port: 22
  port: 4000
  status_port: 10080
  deploy_dir: /data/tidb_deploy/tidb-4000
  log_dir: /data/tidb_deploy/tidb-4000/log
  arch: amd64
  os: linux
- host: 10.59.111.227
  ssh_port: 22
  port: 4000
  status_port: 10080
  deploy_dir: /data/tidb_deploy/tidb-4000
  log_dir: /data/tidb_deploy/tidb-4000/log
  arch: amd64
  os: linux
tikv_servers:
- host: 10.59.111.132
  ssh_port: 22
  port: 20160
  status_port: 20180
  deploy_dir: /data/tidb_deploy/tikv-20160
  data_dir: /data/tidb_data/tikv-20160
  log_dir: /data/tidb_deploy/tikv-20160/log
  arch: amd64
  os: linux
- host: 10.59.111.133
  ssh_port: 22
  port: 20160
  status_port: 20180
  deploy_dir: /data/tidb_deploy/tikv-20160
  data_dir: /data/tidb_data/tikv-20160
  log_dir: /data/tidb_deploy/tikv-20160/log
  arch: amd64
  os: linux
- host: 10.59.111.224
  ssh_port: 22
  port: 20160
  status_port: 20180
  deploy_dir: /data/tidb_deploy/tikv-20160
  data_dir: /data/tidb_data/tikv-20160
  log_dir: /data/tidb_deploy/tikv-20160/log
  arch: amd64
  os: linux
- host: 10.59.111.177
  ssh_port: 22
  port: 20160
  status_port: 20180
  deploy_dir: /data/tidb_deploy/tikv-20160
  data_dir: /data/tidb_data/tikv-20160
  log_dir: /data/tidb_deploy/tikv-20160
  arch: amd64
  os: linux
tiflash_servers:
- host: 10.59.111.10
  ssh_port: 22
  tcp_port: 9000
  http_port: 8123
  flash_service_port: 3930
  flash_proxy_port: 20170
  flash_proxy_status_port: 20292
  metrics_port: 8234
  deploy_dir: /data/tidb_deploy/tiflash-9000
  data_dir: /data/tidb_data/tiflash-9000
  log_dir: /data/tidb_deploy/tiflash-9000/log
  arch: amd64
  os: linux
pd_servers:
- host: 10.59.111.225
  ssh_port: 22
  name: pd-10.59.111.225-2379
  client_port: 2379
  peer_port: 2380
  deploy_dir: /data/tidb_deploy/pd-2379
  data_dir: /data/tidb_data/pd-2379
  log_dir: /data/tidb_deploy/pd-2379/log
  arch: amd64
  os: linux
- host: 10.59.111.226
  ssh_port: 22
  name: pd-10.59.111.226-2379
  client_port: 2379
  peer_port: 2380
  deploy_dir: /data/tidb_deploy/pd-2379
  data_dir: /data/tidb_data/pd-2379
  log_dir: /data/tidb_deploy/pd-2379/log
  arch: amd64
  os: linux
- host: 10.59.111.227
  ssh_port: 22
  name: pd-10.59.111.227-2379
  client_port: 2379
  peer_port: 2380
  deploy_dir: /data/tidb_deploy/pd-2379
  data_dir: /data/tidb_data/pd-2379
  log_dir: /data/tidb_deploy/pd-2379/log
  arch: amd64
  os: linux
cdc_servers:
- host: 10.59.110.178
  ssh_port: 22
  port: 8300
  deploy_dir: /data/cdc/8300
  data_dir: /data/cdc/8300/store
  log_dir: /data/cdc/8300/log
  gc-ttl: 86400
  arch: amd64
  os: linux
- host: 10.59.110.17
  ssh_port: 22
  port: 8300
  deploy_dir: /data/cdc/8300
  data_dir: /data/cdc/8300/store
  log_dir: /data/cdc/8300/log
  gc-ttl: 86400
  arch: amd64
  os: linux
- host: 10.59.110.93
  ssh_port: 22
  port: 8300
  deploy_dir: /data/cdc/8300
  data_dir: /data/cdc/8300/store
  log_dir: /data/cdc/8300/log
  gc-ttl: 86400
  arch: amd64
  os: linux
monitoring_servers:
- host: 10.59.111.10
  ssh_port: 22
  port: 9090
  deploy_dir: /data/tidb_deploy/prometheus-9090
  data_dir: /data/tidb_data/prometheus-9090
  log_dir: /data/tidb_deploy/prometheus-9090/log
  external_alertmanagers: []
  arch: amd64
  os: linux
grafana_servers:
- host: 10.59.111.10
  ssh_port: 22
  port: 3000
  deploy_dir: /data/tidb_deploy/grafana-3000
  arch: amd64
  os: linux
  username: admin
  password: admin
  anonymous_enable: false
  root_url: ""
  domain: ""
alertmanager_servers:
- host: 10.59.111.10
  ssh_port: 22
  web_port: 9093
  cluster_port: 9094
  deploy_dir: /data/tidb_deploy/alertmanager-9093
  data_dir: /data/tidb_data/alertmanager-9093
  log_dir: /data/tidb_deploy/alertmanager-9093/log
  arch: amd64
  os: linux

之前是不是部署过 CDC 的某一个老版本,然后 scale-in 过,现在重新通过 scale-out 来部署新版本的 CDC 集群?
如果是这种情况,可能就是因为老的 CDC 在集群中有遗留的 CDC 数据,可能需要通过 unsafe 的方式删除一些遗留数据。

很早之前,应该是tidb v4.0.1有部署过ticdc组件,后面scale-in了

看起来是残留数据的问题了?

那应该是元信息和老版本不兼容,残留数据导致的,如果确认之前的数据已经不需要了,可以使用 cdc cli unsafe reset --pd=${PD_ADDR} 来清除之前的数据。

好的,我试试看

问下,这个命令只对ticdc有影响,对其它组件没有影响吧,比如pd,tidb-server,tikv等?

是的,只会影响 CDC 不会影响其他组件。

好的,多谢回复

可以,成功启动了:call_me_hand:

多谢多谢:grinning:

操作步骤:
1、scale-in ticdc

tiup cluster scale-in dem -N 10.59.110.17:8300,10.59.110.178:8300,10.59.110.93:8300

2、清理ticdc无效元数据信息

tiup cdc cli unsafe reset --pd=http://10.59.111.227:2379

3、重新部署ticdc,成功启动

tiup cluster scale-out demo cdc.yml
1 个赞

:+1:,看来还是有好多细节问题

看来是跨版本升级了 之前老版本装过cdc

此话题已在最后回复的 1 分钟后被自动关闭。不再允许新回复。