【TiDB 使用环境】测试
【TiDB 版本】8.5.1
【操作系统】AlmaLinux release 9.2
【部署方式】pd/tidb 共用3台:8c 16g 150G SSD,tikv独用3台: 16c 32g 200G SSD
【集群数据量】
【集群节点数】
【问题复现路径】tiup cluster deploy tidb-test v8.5.1 ./topology.yaml --user root 成功
tiup cluster start tidb-test --init 失败
【遇到的问题:问题现象及影响】
初始化启动集群时,PD启动失败,报错:“Error: failed to start pd: failed to start: xx.xx.xx.4 pd-2379.service, please check the instance’s log(/data/tidb/qdtest-tidb_test/deploy/pd-2379/log) for more detail.: timed out waiting for port 2379 to be started after 2m0s”
但日志内容是空的,相关目录权限也确认是755,部署用户用的root,也不存在权限问题
配置文件如下:
global:
user: root
ssh_port: 27615
ssh_type: builtin
listen_host: 0.0.0.0
deploy_dir: /data/tidb/qdtest-tidb_test/deploy
data_dir: /data/tidb/qdtest-tidb_test/data
os: linux
systemd_mode: system
monitored:
node_exporter_port: 9100
blackbox_exporter_port: 9115
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/monitored-9100
data_dir: /data/tidb/qdtest-tidb_test/data/monitored-9100
log_dir: /data/tidb/qdtest-tidb_test/deploy/monitored-9100/log
server_configs:
tidb:
graceful-wait-before-shutdown: 15 # 关闭或重启时等待事务完成的时间,单位为秒,超时后无论请求是否完成,强制终止进程
log.level: warn # 日志级别
oom-use-tmp-storage: true # 内存不足时启用磁盘临时存储
token-limit: 1000 # 同时允许运行的 Session 数量,用于流量控制
prepared-plan-cache.enabled: true # 开启执行计划缓存
performance.txn-total-size-limit: 536870912 # TiDB 单个事务大小限制,单位:Byte
performance.max-procs: 4 # 限制CPU使用核数
tikv:
log.level: warn # 日志级别
raftdb.max-background-jobs: 4 # RocksDB 后台线程个数。CPU 核数为 10 时,默认值为 9; CPU 核数为 8 时,默认值为 7; CPU 核数为 N 时,默认值为 max(2, min(N - 1, 9))
raftstore.capacity: 80GB # 存储容量,即允许的最大数据存储大小
raftstore.sync-log: false # 异步刷盘(性能优先)
raftstore.raft-log-gc-threshold: 100 # Raft日志保留阈值
readpool.storage.normal-concurrency: 8 # 处理普通优先级读请求的线程池线程数量
rocksdb.max-background-jobs: 8 # 后台任务线程数(CPU核数*0.75)
server.grpc-concurrency: 8 # gRPC 工作线程的数量
storage.reserve-space: 10GB # 磁盘预留空间
storage.block-cache.shared: true # 是否启用共享缓存
storage.block-cache.capacity: 4GB # Block缓存(内存的30%)
storage.data-dir: /data/tidb/qdtest-tidb_test/data/tikv-20160 # 独立高性能NVMe SSD
pd:
log.level: warn # 日志级别
replication.location-labels: ["zone", "host"] # 跨机房容灾标签
schedule.max-merge-region-size: 20GB # 合并Region阈值(大Region优化)
schedule.max-merge-region-keys: 200000 # 控制 Region Merge 的 key 上限,当 Region key 大于指定值时 PD 不会将其与相邻的 Region 合并
schedule.high-space-ratio: 0.7 # 设置 store 空间充裕的阈值
schedule.low-space-ratio: 0.8 # 设置 store 空间不足的阈值
tiflash:
profiles.default.max_memory_usage: 1073741824 # 内存限制,单位Byte
tiproxy:
api.addr: 0.0.0.0:3080 # 管理API服务监听地址及端口,curl http://tiproxy-ip:3080/status :返回 HTTP 200 表示实例健康
graceful-wait-before-shutdown: 15 # 关闭或重启时等待事务完成的时间,单位为秒,超时后无论请求是否完成,强制终止进程
ha.interface: ens18 # 网卡名
ha.virtual-ip: xx.xx.xx.229/24 # VIP
log.level: warn # 日志级别
log.log-file.filename: /data/tidb/qdtest-tidb_test/deploy/tiproxy-6000/log/tiproxy.log
proxy.addr: 0.0.0.0:6000 # 监听的地址
proxy.backend.max_conn_per_host: 512 # 限制 每个TiProxy实例到单个TiDB后端节点的最大并发连接数
proxy.backend.max_connections: 2048 # 后端连接池大小
proxy.frontend.idle_timeout: 3600 # 前端连接空闲超时(秒)
proxy.health_check.interval: 5s # 后端TiDB节点执行主动健康检查的时间间隔
proxy.pd-addrs: xx.xx.xx.2:2379,xx.xx.xx.3:2379,xx.xx.xx.4:2379 # PD节点信息
tidb_servers:
- host: xx.xx.xx.2
ssh_port: 27615
port: 4000
status_port: 10080
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tidb-4000
log_dir: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/log
config:
security.session-token-signing-cert: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tls/tiproxy-session.crt
security.session-token-signing-key: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tls/tiproxy-session.key
temp-storage-path: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tmp
- host: xx.xx.xx.3
ssh_port: 27615
port: 4000
status_port: 10080
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tidb-4000
log_dir: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/log
config:
security.session-token-signing-cert: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tls/tiproxy-session.crt
security.session-token-signing-key: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tls/tiproxy-session.key
temp-storage-path: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tmp
- host: xx.xx.xx.4
ssh_port: 27615
port: 4000
status_port: 10080
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tidb-4000
log_dir: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/log
config:
security.session-token-signing-cert: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tls/tiproxy-session.crt
security.session-token-signing-key: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tls/tiproxy-session.key
temp-storage-path: /data/tidb/qdtest-tidb_test/deploy/tidb-4000/tmp
tikv_servers:
- host: xx.xx.xx.5
ssh_port: 27615
port: 20160
status_port: 20180
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tikv-20160
data_dir: /data/tidb/qdtest-tidb_test/data/tikv-20160
log_dir: /data/tidb/qdtest-tidb_test/deploy/tikv-20160/log
- host: xx.xx.xx.6
ssh_port: 27615
port: 20160
status_port: 20180
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tikv-20160
data_dir: /data/tidb/qdtest-tidb_test/data/tikv-20160
log_dir: /data/tidb/qdtest-tidb_test/deploy/tikv-20160/log
- host: xx.xx.xx.230
ssh_port: 27615
port: 20160
status_port: 20180
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tikv-20160
data_dir: /data/tidb/qdtest-tidb_test/data/tikv-20160
log_dir: /data/tidb/qdtest-tidb_test/deploy/tikv-20160/log
tiflash_servers:
- host: xx.xx.xx.5
ssh_port: 27615
tcp_port: 9000
http_port: 8123
flash_service_port: 3930
flash_proxy_port: 20170
flash_proxy_status_port: 20292
metrics_port: 8234
deploy_dir: /data1/tidb/qdtest-tidb_test/deploy/tiflash-9000
data_dir: /data1/tidb/qdtest-tidb_test/data/tiflash-9000/tmp
log_dir: /data1/tidb/qdtest-tidb_test/deploy/tiflash-9000/log
config:
storage.main.dir: ["/data1/tidb/qdtest-tidb_test/data/tiflash-9000/tmp"] # 存储路径
storage.latest.dir: [] # 禁用实时更新存储
- host: xx.xx.xx.6
ssh_port: 27615
tcp_port: 9000
http_port: 8123
flash_service_port: 3930
flash_proxy_port: 20170
flash_proxy_status_port: 20292
metrics_port: 8234
deploy_dir: /data1/tidb/qdtest-tidb_test/deploy/tiflash-9000
data_dir: /data1/tidb/qdtest-tidb_test/data/tiflash-9000/tmp
log_dir: /data1/tidb/qdtest-tidb_test/deploy/tiflash-9000/log
config:
storage.main.dir: ["/data1/tidb/qdtest-tidb_test/data/tiflash-9000/tmp"] # 存储路径
storage.latest.dir: [] # 禁用实时更新存储
- host: xx.xx.xx.230
ssh_port: 27615
tcp_port: 9000
http_port: 8123
flash_service_port: 3930
flash_proxy_port: 20170
flash_proxy_status_port: 20292
metrics_port: 8234
deploy_dir: /data1/tidb/qdtest-tidb_test/deploy/tiflash-9000
data_dir: /data1/tidb/qdtest-tidb_test/data/tiflash-9000/tmp
log_dir: /data1/tidb/qdtest-tidb_test/deploy/tiflash-9000/log
config:
storage.main.dir: ["/data1/tidb/qdtest-tidb_test/data/tiflash-9000/tmp"] # 存储路径
storage.latest.dir: [] # 禁用实时更新存储
tiproxy_servers:
- host: xx.xx.xx.2
ssh_port: 27615
port: 6000
status_port: 3080
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tiproxy-6000
- host: xx.xx.xx.3
ssh_port: 27615
port: 6000
status_port: 3080
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tiproxy-6000
- host: xx.xx.xx.4
ssh_port: 27615
port: 6000
status_port: 3080
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/tiproxy-6000
pd_servers:
- host: xx.xx.xx.2
ssh_port: 27615
name: pd-1
client_port: 2379
peer_port: 2380
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/pd-2379
data_dir: /data/tidb/qdtest-tidb_test/data/pd-2379
log_dir: /data/tidb/qdtest-tidb_test/deploy/pd-2379/log
- host: xx.xx.xx.3
ssh_port: 27615
name: pd-2
client_port: 2379
peer_port: 2380
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/pd-2379
data_dir: /data/tidb/qdtest-tidb_test/data/pd-2379
log_dir: /data/tidb/qdtest-tidb_test/deploy/pd-2379/log
- host: xx.xx.xx.4
ssh_port: 27615
name: pd-3
client_port: 2379
peer_port: 2380
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/pd-2379
data_dir: /data/tidb/qdtest-tidb_test/data/pd-2379
log_dir: /data/tidb/qdtest-tidb_test/deploy/pd-2379/log
monitoring_servers:
- host: xx.xx.xx.2
ssh_port: 27615
port: 9090
ng_port: 12020
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/prometheus-8249
data_dir: /data/tidb/qdtest-tidb_test/data/prometheus-8249
log_dir: /data/tidb/qdtest-tidb_test/deploy/prometheus-8249/log
external_alertmanagers: []
storage_retention: 30d
grafana_servers:
- host: xx.xx.xx.2
ssh_port: 27615
port: 3000
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/grafana-3000
username: admin
password: admin
anonymous_enable: false
root_url: ""
domain: ""
alertmanager_servers:
- host: xx.xx.xx.2
ssh_port: 27615
web_port: 9093
cluster_port: 9094
deploy_dir: /data/tidb/qdtest-tidb_test/deploy/alertmanager-9093
data_dir: /data/tidb/qdtest-tidb_test/data/alertmanager-9093
log_dir: /data/tidb/qdtest-tidb_test/deploy/alertmanager-9093/log