【SOP 系列 15】如何在 Kubernetes 上部署 TiDB-Operator (上)

1. 准备 Kubernetes 的网络环境

如果你使用可类似于 Kubeadm 这样的工具部署,请确保以下的组件可以正常运行

  • kube-proxy
  • flannel
  • coredns

如果你使用了二进制的部署方式,可以参考以下步骤安装上述的组件。

1.1 部署 flannel

1.1.1 下载 flannel package

## create a folder to download the flannel package
[root@r21 soft]# mkdir -p /opt/soft/flannel && cd /opt/soft/flannel
## using bellowing link to download the flannel
[root@r21 soft]# wget https://github.com/coreos/flannel/releases/download/v0.10.0/flannel-v0.10.0-linux-amd64.tar.gz
[root@r21 flannel]# tar vxzf flannel-v0.10.0-linux-amd64.tar.gz

1.1.2 创建 flannel 的 prepare 文件

## create the prepare file for flannel
[root@r21 flannel]# cd /opt/soft/flannel
cat << EOF> /opt/soft/flannel/remove-docker0.sh
#!/bin/bash
set -e

rc=0
ip link show docker0 >/dev/null 2>&1 || rc="\$?"
if [[ "\$rc" -eq "0" ]]; then
  ip link set dev docker0 down
  ip link delete docker0
fi
EOF

## grant 751 privileges for remove-docker0.sh
[root@r21 flannel]# chmod 751 /opt/soft/flannel/remove-docker0.sh

1.1.3 修改 flannel 的 post

[root@r21 ~]# cd /opt/soft/flannel
[root@r21 flannel]# sed -i "s/combined_opts_key=\"DOCKER_OPTS\"/combined_opts_key=\"DOCKER_NETWORK_OPTS\"/g" /opt/soft/flannel/mk-docker-opts.sh

1.1.4 创建 flannel CA 文件

如果在搭建 k8s 的时候使用了 CA 认证,通过以下方式创建 flannel 的 CA 认证文件

## if we wanto use the tls, use bellowing command to generate a CA for flannel
[root@r21 flannel]# cd /opt/soft/flannel
cat > /opt/soft/flannel/flanneld-csr.json <<EOF
{
  "CN": "flanneld",
  "hosts": [],
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "BeiJing",
      "L": "BeiJing",
      "O": "k8s",
      "OU": "System"
    }
  ]
}
EOF


cfssl gencert -ca=/opt/kubernetes/ssl/ca.pem \
   -ca-key=/opt/kubernetes/ssl/ca-key.pem \
   -config=/opt/kubernetes/ssl/ca-config.json \
   -profile=kubernetes flanneld-csr.json | cfssljson -bare flanneld

1.1.5 创建 flannel 的配置文件

[root@r21 flannel]# cd /opt/soft/flannel
cat > /opt/soft/flannel/flannel-cfg  <<EOF
FLANNEL_ETCD="-etcd-endpoints=https://192.168.10.21:2379,https://192.168.10.22:2379,https://192.168.10.23:2379"
FLANNEL_ETCD_KEY="-etcd-prefix=/opt/kubernetes/network"
FLANNEL_ETCD_CAFILE="--etcd-cafile=/opt/kubernetes/ssl/ca.pem"
FLANNEL_ETCD_CERTFILE="--etcd-certfile=/opt/kubernetes/ssl/flanneld.pem"
FLANNEL_ETCD_KEYFILE="--etcd-keyfile=/opt/kubernetes/ssl/flanneld-key.pem"
EOF

1.1.6 创建 flannel 的 service 文件

[root@r21 flannel]# cd /opt/soft/flannel
cat << EOF> /opt/soft/flannel/flannel.service
[Unit]
Description=Flanneld overlay address etcd agent
After=network.target
Before=docker.service

[Service]
EnvironmentFile=-/opt/kubernetes/cfg/flannel
ExecStartPre=/opt/kubernetes/bin/remove-docker0.sh
ExecStart=/opt/kubernetes/bin/flanneld \${FLANNEL_ETCD} \${FLANNEL_ETCD_KEY} \${FLANNEL_ETCD_CAFILE} \${FLANNEL_ETCD_CERTFILE} \${FLANNEL_ETCD_KEYFILE}
ExecStartPost=/opt/kubernetes/bin/mk-docker-opts.sh -d /run/flannel/docker

Type=notify

[Install]
WantedBy=multi-user.target
RequiredBy=docker.service
EOF

1.1.7 分发 flannel 的文件

[root@r21 flannel]# cd /opt/soft/flannel

## distribute the prepare, post script and binary file to all the k8s nodes
[root@r21 flannel]# for node_ip in k8s-master k8s-node1 k8s-node2; do scp flannel flanneld remove-docker0.sh mk-docker-opts.sh root@${node_ip}:/opt/kubernetes/bin/; done

## distribute the flannel file for flannel to all k8s nodes
[root@r21 flannel]# for node_ip in k8s-master k8s-node1 k8s-node2; do scp flannel.service root@${node_ip}:/usr/lib/systemd/system/; done

## distribute the CA files for 
[root@r21 flannel]# for node_ip in k8s-master k8s-node1 k8s-node2; do scp flanneld.csr flanneld-csr.json flanneld-key.pem flanneld.pem   root@${node_ip}:/opt/kubernetes/ssl/; done

## distribute the configuration file for flannel
[root@r21 flannel]# for node_ip in k8s-master k8s-node1 k8s-node2; do scp flannel-cfg root@${node_ip}:/opt/kubernetes/cfg/flannel; done

1.1.8 创建 flannel 的 etcd key

## run bellowing command in one of etcd servers to generate a etcd key for flannel
/opt/kubernetes/bin/etcdctl \
--ca-file /opt/kubernetes/ssl/ca.pem \
--cert-file /opt/kubernetes/ssl/flanneld.pem \
--key-file /opt/kubernetes/ssl/flanneld-key.pem \
--no-sync -C https://192.168.10.21:2379,https://192.168.10.22:2379,https://192.168.10.23:2379 \
mk /opt/kubernetes/network/config '{ "Network": "10.0.0.0/16", "Backend": { "Type": "vxlan", "VNI": 1 }}'

## use bellowing command to check the etcd key for flannel
/opt/kubernetes/bin/etcdctl \
  --ca-file /opt/kubernetes/ssl/ca.pem \
  --cert-file /opt/kubernetes/ssl/flanneld.pem \
  --key-file /opt/kubernetes/ssl/flanneld-key.pem \
  --no-sync -C https://192.168.10.21:2379,https://192.168.10.22:2379,https://192.168.10.23:2379 \
get /opt/kubernetes/network/config
## the output ## { "Network": "10.0.0.0/16", "Backend": { "Type": "vxlan", "VNI": 1 }}

1.1.9 修改 docker service 并重启

## modify all the service file for docker
[root@r21 docker]# cat /usr/lib/systemd/system/docker.service
EnvironmentFile=-/run/flannel/docker
ExecStart=/usr/bin/dockerd $DOCKER_NETWORK_OPTS -H fd:// --containerd=/run/containerd/containerd.sock

## restart all the docker service
[root@r21 flannel]# systemctl daemon-reload && systemctl restart docker

1.1.10 启动 flannel 服务

## start flannel service in all k8s server
[root@r21 flannel]# systemctl start flannel
[root@r21 flannel]# systemctl status flannel
● flannel.service - Flanneld overlay address etcd agent
   Loaded: loaded (/usr/lib/systemd/system/flannel.service; disabled; vendor preset: disabled)
   Active: active (running) since Thu 2020-12-24 04:07:07 EST; 35s ago
  Process: 4658 ExecStartPost=/opt/kubernetes/bin/mk-docker-opts.sh -d /run/flannel/docker (code=exited, status=0/SUCCESS)
  Process: 4637 ExecStartPre=/opt/kubernetes/bin/remove-docker0.sh (code=exited, status=0/SUCCESS)
 Main PID: 4644 (flanneld)
    Tasks: 7
   Memory: 6.4M
   CGroup: /system.slice/flannel.service
           └─4644 /opt/kubernetes/bin/flanneld -etcd-endpoints=https://192.168.10.21:2379,https://192.168.10.22:2379,https://192.168.10.23:2379 -etcd-prefix=/opt/kubernetes/network --etcd-cafile=/opt/kubernetes/ssl/ca.pem --etcd-certfile=/opt/kubernetes/ssl/flanneld.pem --etcd-keyfile=/opt/kubernetes/ssl/flanneld-key.pem

Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.389605    4644 main.go:300] Wrote subnet file to /run/flannel/subnet.env
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.389621    4644 main.go:304] Running backend.
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.389887    4644 vxlan_network.go:60] watching for new subnet leases
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.391761    4644 iptables.go:115] Some iptables rules are missing; deleting and recreating rules
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.391769    4644 iptables.go:137] Deleting iptables rule: -s 10.0.0.0/16 -j ACCEPT
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.393906    4644 iptables.go:137] Deleting iptables rule: -d 10.0.0.0/16 -j ACCEPT
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.394377    4644 iptables.go:125] Adding iptables rule: -s 10.0.0.0/16 -j ACCEPT
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.395496    4644 main.go:396] Waiting for 22h59m59.992331731s to renew lease
Dec 24 04:07:07 r21 flanneld[4644]: I1224 04:07:07.395508    4644 iptables.go:125] Adding iptables rule: -d 10.0.0.0/16 -j ACCEPT
Dec 24 04:07:07 r21 systemd[1]: Started Flanneld overlay address etcd agent.

1.1.11 检查 docker 的 network interface

# check the network interface in all k8s servers
[root@r21 flannel]# ip addr
... ...
8: flannel.1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UNKNOWN group default
    link/ether 26:62:f5:f9:a8:05 brd ff:ff:ff:ff:ff:ff
    inet 10.0.62.0/32 scope global flannel.1
       valid_lft forever preferred_lft forever
    inet6 fe80::2462:f5ff:fef9:a805/64 scope link
       valid_lft forever preferred_lft forever
9: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN group default
    link/ether 02:42:c4:f3:fa:1e brd ff:ff:ff:ff:ff:ff
    inet 172.17.0.1/16 brd 172.17.255.255 scope global docker0
       valid_lft forever preferred_lft forever

1.2 部署 kube-proxy

1.2.1 安装 kube-proxy 的依赖

## install bellowing packages in all k8s servers
[root@r21 ~]# yum install -y ipvsadm ipset conntrack

1.2.2 创建并分发 kube-proxy 的 CA 文件

如果在搭建 Kubernetes 的时候使用了 CA 认证,这里也需要创建 CA 认证

[root@r21 ~]# /opt/kubernetes/ssl
## generate the csr json file for kube-proxy
cat <<EOF > /opt/kubernetes/ssl/kube-proxy-csr.json
{
  "CN": "system:kube-proxy",
  "hosts": [],
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "BeiJing",
      "L": "BeiJing",
      "O": "k8s",
      "OU": "System"
    }
  ]
}
EOF

## generate the CA files for kube-proxy
[root@r21 ~]# cd /opt/kubernetes/ssl
cfssl gencert -ca=/opt/kubernetes/ssl/ca.pem \
   -ca-key=/opt/kubernetes/ssl/ca-key.pem \
   -config=/opt/kubernetes/ssl/ca-config.json \
   -profile=kubernetes  kube-proxy-csr.json | cfssljson -bare kube-proxy
   
## distribute the CA files for kube-proxy
[root@r21 ssl]# for node_ip in k8s-master k8s-node1 k8s-node2; do scp kube-proxy-key.pem kube-proxy.pem root@${node_ip}:/opt/kubernetes/ssl/; done

1.2.3 创建并分发 kube-proxy 的 kubeconfig 文件

## execute bellowing command to create the kube-proxy configuration file in k8s master server
[root@r21 ~]# /opt/kubernetes/cfg
kubectl config set-cluster kubernetes \
  --certificate-authority=/opt/kubernetes/ssl/ca.pem \
  --embed-certs=true \
  --server=https://192.168.10.21:6443 \
  --kubeconfig=/opt/kubernetes/cfg/kube-proxy.kubeconfig

kubectl config set-credentials kube-proxy \
  --client-certificate=/opt/kubernetes/ssl/kube-proxy.pem \
  --client-key=/opt/kubernetes/ssl/kube-proxy-key.pem \
  --embed-certs=true \
  --kubeconfig=/opt/kubernetes/cfg/kube-proxy.kubeconfig

kubectl config set-context default \
  --cluster=kubernetes \
  --user=kube-proxy \
  --kubeconfig=/opt/kubernetes/cfg/kube-proxy.kubeconfig

kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig

## distribute the kubeconfig file for all k8s servers
[root@r21 cfg]# for node_ip in k8s-master k8s-node1 k8s-node2; do scp /opt/kubernetes/cfg/kube-proxy.kubeconfig root@${node_ip}:/opt/kubernetes/cfg/; done

1.2.4 创建 kube-proxy 配置文件

## run bellowing command in all k8s server
## we need indicate the server ip for the variable server_ip
export server_ip=192.168.10.11
cat <<EOF > /opt/kubernetes/cfg/kube-proxy
KUBE_PROXY_OPTS="--logtostderr=false \\
--v=4 \\
--log-dir=/opt/kubernetes/log \\
--hostname-override=${server_ip} \\
--cluster-cidr=10.0.0.0/24 \\
--proxy-mode=ipvs \\
--ipvs-min-sync-period=5s \\
--ipvs-sync-period=5s \\
--ipvs-scheduler=rr \\
--masquerade-all=true \\
--kubeconfig=/opt/kubernetes/cfg/kube-proxy.kubeconfig"
EOF

1.2.5 创建 kube-proxy 的 service 文件

## crete the kube-proxy service file in all k8s servers
cat <<EOF > /usr/lib/systemd/system/kube-proxy.service
[Unit]
Description=Kubernetes Proxy
After=network.target

[Service]
EnvironmentFile=-/opt/kubernetes/cfg/kube-proxy
ExecStart=/opt/kubernetes/bin/kube-proxy \$KUBE_PROXY_OPTS
Restart=on-failure
RestartSec=5
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target

EOF

1.2.6 启动 kube-proxy service

[root@r21 cfg]# systemctl start kube-proxy

[root@r21 cfg]# systemctl status kube-proxy
● kube-proxy.service - Kubernetes Proxy
   Loaded: loaded (/usr/lib/systemd/system/kube-proxy.service; disabled; vendor preset: disabled)
   Active: active (running) since Thu 2020-12-24 04:54:40 EST; 40s ago
 Main PID: 6190 (kube-proxy)
    Tasks: 0
   Memory: 6.4M
   CGroup: /system.slice/kube-proxy.service
           ‣ 6190 /opt/kubernetes/bin/kube-proxy --logtostderr=false --v=4 --log-dir=/opt/kubernetes/log --hostname-override=192.168.10.21 --cluster-cidr=10.0.0.0/24 --proxy-mode=ipvs --ipvs-min-sync-period=5s --ipvs-sync-period=5s --ipvs-scheduler=rr --masquerade-all=true --kubeconfig=/opt/kubernetes/cfg/kube-proxy.kubecon...

Dec 24 04:55:21 r21 kube-proxy[6190]: -A KUBE-LOAD-BALANCER -j KUBE-MARK-MASQ
Dec 24 04:55:21 r21 kube-proxy[6190]: -A KUBE-FIREWALL -j KUBE-MARK-DROP
Dec 24 04:55:21 r21 kube-proxy[6190]: -A KUBE-SERVICES -m set --match-set KUBE-CLUSTER-IP dst,dst -j ACCEPT
Dec 24 04:55:21 r21 kube-proxy[6190]: COMMIT
Dec 24 04:55:21 r21 kube-proxy[6190]: *filter
Dec 24 04:55:21 r21 kube-proxy[6190]: :KUBE-FORWARD - [0:0]
Dec 24 04:55:21 r21 kube-proxy[6190]: -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x00004000/0x00004000 -j ACCEPT
Dec 24 04:55:21 r21 kube-proxy[6190]: -A KUBE-FORWARD -s 10.0.0.0/24 -m comment --comment "kubernetes forwarding conntrack pod source rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
Dec 24 04:55:21 r21 kube-proxy[6190]: -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod destination rule" -d 10.0.0.0/24 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
Dec 24 04:55:21 r21 kube-proxy[6190]: COMMIT

1.2.7 检查 kube-proxy

## check the kubernetes service 
[root@r21 cfg]# kubectl get service
NAME         TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)   AGE
kubernetes   ClusterIP   10.0.0.1     <none>        443/TCP   3h24m

## check the connection to kubernetes service in all k8s servers
[root@r21 cfg]# ping 10.0.0.1 -c 2
PING 10.0.0.1 (10.0.0.1) 56(84) bytes of data.
64 bytes from 10.0.0.1: icmp_seq=1 ttl=64 time=0.030 ms
64 bytes from 10.0.0.1: icmp_seq=2 ttl=64 time=0.035 ms

--- 10.0.0.1 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1000ms
rtt min/avg/max/mdev = 0.030/0.032/0.035/0.006 ms

1.3 部署 coredns

1.3.1 下载 coredns yaml 文件

## we can use bellowing link to download coredns yaml file
https://github.com/coredns/deployment/blob/master/kubernetes/coredns.yaml.sed

[root@k8s-master coredns]# cat coredns.yaml
# Warning: This is a file generated from the base underscore template file: coredns.yaml.base

apiVersion: v1
kind: ServiceAccount
metadata:
  name: coredns
  namespace: kube-system
  labels:
      kubernetes.io/cluster-service: "true"
      addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
    addonmanager.kubernetes.io/mode: Reconcile
  name: system:coredns
rules:
- apiGroups:
  - ""
  resources:
  - endpoints
  - services
  - pods
  - namespaces
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  annotations:
    rbac.authorization.kubernetes.io/autoupdate: "true"
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
    addonmanager.kubernetes.io/mode: EnsureExists
  name: system:coredns
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:coredns
subjects:
- kind: ServiceAccount
  name: coredns
  namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: coredns
  namespace: kube-system
  labels:
      addonmanager.kubernetes.io/mode: EnsureExists
data:
  Corefile: |
    .:53 {
        errors
        health
        kubernetes cluster.local  in-addr.arpa ip6.arpa {  #cluster.local集群域名
            pods insecure
            upstream
            fallthrough in-addr.arpa ip6.arpa
            ttl 30
        }
        prometheus :9153
        forward . /etc/resolv.conf
        cache 30
        loop
        reload
        loadbalance
    }
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: coredns
  namespace: kube-system
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/name: "CoreDNS"
spec:
  # replicas: not specified here:
  # 1. In order to make Addon Manager do not reconcile this replicas parameter.
  # 2. Default is 1.
  # 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
  selector:
    matchLabels:
      k8s-app: kube-dns
  template:
    metadata:
      labels:
        k8s-app: kube-dns
      annotations:
        seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
    spec:
      priorityClassName: system-cluster-critical
      serviceAccountName: coredns
      tolerations:
        - key: "CriticalAddonsOnly"
          operator: "Exists"
      nodeSelector:
        beta.kubernetes.io/os: linux
      containers:
      - name: coredns
        image: k8s.gcr.io/coredns:1.3.1
        imagePullPolicy: IfNotPresent
        resources:
          limits:
            memory: 1024Mi
          requests:
            cpu: 100m
            memory: 70Mi
        args: [ "-conf", "/etc/coredns/Corefile" ]
        volumeMounts:
        - name: config-volume
          mountPath: /etc/coredns
          readOnly: true
        ports:
        - containerPort: 53
          name: dns
          protocol: UDP
        - containerPort: 53
          name: dns-tcp
          protocol: TCP
        - containerPort: 9153
          name: metrics
          protocol: TCP
        livenessProbe:
          httpGet:
            path: /health
            port: 8080
            scheme: HTTP
          initialDelaySeconds: 60
          timeoutSeconds: 5
          successThreshold: 1
          failureThreshold: 5
        readinessProbe:
          httpGet:
            path: /health
            port: 8080
            scheme: HTTP
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            add:
            - NET_BIND_SERVICE
            drop:
            - all
          readOnlyRootFilesystem: true
      dnsPolicy: Default
      volumes:
        - name: config-volume
          configMap:
            name: coredns
            items:
            - key: Corefile
              path: Corefile
---
apiVersion: v1
kind: Service
metadata:
  name: kube-dns
  namespace: kube-system
  annotations:
    prometheus.io/port: "9153"
    prometheus.io/scrape: "true"
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/name: "CoreDNS"
spec:
  selector:
    k8s-app: kube-dns
  clusterIP: 10.0.0.2   ## modify your cluster ip
  ports:
  - name: dns
    port: 53
    protocol: UDP
  - name: dns-tcp
    port: 53
    protocol: TCP
  - name: metrics
    port: 9153
    protocol: TCP

1.3.2 创建 coredns

[root@r21 soft]# kubectl apply -f coredns.yaml
[root@r21 soft]# kubectl get pods -n kube-system
NAME                       READY   STATUS    RESTARTS   AGE
coredns-74dff7f48d-2j7s2   1/1     Running   0          66m

1赞