目录
一、Kubeadm部署
1、基本操作
2、补充
二、ETCD常用操作
1、kubernetes自动补全:
2、拷贝etcdctl命令行工具:
3、etcdctl常用操作:
1 查看etcd集群的成员节点:
2 查看etcd集群节点状态:
3 设置key值:
4 etcd数据快照与恢复
5 etcd生产级别的备份方案
https://segmentfault.com/a/1190000019465098https://segmentfault.com/a/1190000019465098
基本操作就不详述了,网上找找很多,其中flannel地址访问不到,我直接拿出来了,以前我一直很疑惑apiserver用v1还是v1brta1,kubectl explain pod或者直接-oyaml解决。
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: psp.flannel.unprivileged
annotations:
seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default
seccomp.security.alpha.kubernetes.io/defaultProfileName: docker/default
apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default
apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default
spec:
privileged: false
volumes:
- configMap
- secret
- emptyDir
- hostPath
allowedHostPaths:
- pathPrefix: "/etc/cni/net.d"
- pathPrefix: "/etc/kube-flannel"
- pathPrefix: "/run/flannel"
readOnlyRootFilesystem: false
runAsUser:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
fsGroup:
rule: RunAsAny
allowPrivilegeEscalation: false
defaultAllowPrivilegeEscalation: false
# Capabilities
allowedCapabilities: ['NET_ADMIN', 'NET_RAW']
defaultAddCapabilities: []
requiredDropCapabilities: []
# Host namespaces
hostPID: false
hostIPC: false
hostNetwork: true
hostPorts:
- min: 0
max: 65535
# SELinux
seLinux:
# SELinux is unused in CaaSP
rule: 'RunAsAny'
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: flannel
rules:
- apiGroups: ['extensions']
resources: ['podsecuritypolicies']
verbs: ['use']
resourceNames: ['psp.flannel.unprivileged']
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: flannel
namespace: kube-system
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-system
labels:
tier: node
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.244.0.0/16",
"Backend": {
"Type": "vxlan"
}
}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-system
labels:
tier: node
app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostNetwork: true
priorityClassName: system-node-critical
tolerations:
- operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni-plugin
image: rancher/mirrored-flannelcni-flannel-cni-plugin:v1.2
command:
- cp
args:
- -f
- /flannel
- /opt/cni/bin/flannel
volumeMounts:
- name: cni-plugin
mountPath: /opt/cni/bin
- name: install-cni
image: quay.io/coreos/flannel:v0.15.0
command:
- cp
args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /etc/cni/net.d
- name: flannel-cfg
mountPath: /etc/kube-flannel/
containers:
- name: kube-flannel
image: quay.io/coreos/flannel:v0.15.0
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
resources:
requests:
cpu: "100m"
memory: "50Mi"
limits:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: false
capabilities:
add: ["NET_ADMIN", "NET_RAW"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- name: run
mountPath: /run/flannel
- name: flannel-cfg
mountPath: /etc/kube-flannel/
volumes:
- name: run
hostPath:
path: /run/flannel
- name: cni-plugin
hostPath:
path: /opt/cni/bin
- name: cni
hostPath:
path: /etc/cni/net.d
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
自动补全k8s常用的资源变量,提高效率
yum install -y bash-completion
source /usr/share/bash-completion/bash_completion
source <(kubectl completion bash)
echo "source <(kubectl completion bash)" >> ~/.bashrc
etcd类似redis命令,两者用法有很多相似之处,都是键值对数据库
$ kubectl -n kube-system exec etcd-k8s-master which etcdctl
$ kubectl -n kube-system cp etcd-k8s-master:/usr/local/bin/etcdctl /usr/bin/etcdctl
# 刚开始会出现该报错,需要调整版本至V3
WARNING:
Environment variable ETCDCTL_API is not set; defaults to etcdctl v2.
Set environment variable ETCDCTL_API=3 to use v3 API or ETCDCTL_API=2 to use v2 API.
$ export ETCDCTL_API=3
# 因为每次etcdctl命令都需要附加证书,直接做个别名
$ alias etcdctl='etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key'
$ etcdctl member list -w table
[[email&#160;protected] ~]# etcdctl member list -w table
+------------------+---------+------------+----------------------------+----------------------------+
| ID | STATUS | NAME | PEER ADDRS | CLIENT ADDRS |
+------------------+---------+------------+----------------------------+----------------------------+
| 49c374033081590d | started | k8s-master | https://192.168.0.121:2380 | https://192.168.0.121:2379 |
+------------------+---------+------------+----------------------------+-----------------
$ etcdctl endpoint status -w table
$ etcdctl endpoint health -w table
# 类似redis,手动设置键值对
$ etcdctl put luffy 1
$ etcdctl get luffy
查看所有key值:
$ etcdctl get / --prefix --keys-only
# 我们知道etcd一直处于监听状态,所以集群任何变动都能及时同步到etcd中去,实际上etcd监视着集群所有资源,目录结构为:/registry/资源类型/命名空间/对象ID/,例如监听pod状态,执行
$ etcdctl watch 目录名或者对象名
$ /registry/pods/kube-system/coredns-5644d7b6d9-7gw6t
$ etcdctl get /registry/pods/kube-system/coredns-5644d7b6d9-7gw6t --prefix
查看具体的key对应的数据:
$ etcdctl get /registry/pods/jenkins/sonar-postgres-7fc5d748b6-gtmsb
添加定时任务做数据快照
$ etcdctl snapshot save `hostname`-etcd_`date +%Y%m%d%H%M`.db
恢复快照:
停止etcd和apiserver
移走当前数据目录
$ mv /var/lib/etcd/ /tmp
恢复快照
$ etcdctl snapshot restore `hostname`-etcd_`date +%Y%m%d%H%M`.db --data-dir=/var/lib/etcd/
集群恢复
https://github.com/etcd-io/etcd/blob/master/Documentation/op-guide/recovery.md
# 生产级别利用cronjob定时备份
# etcd-db-bak:/var/lib/etcd_backup
# etcd-cert:/etc/etcd/pki
# stcd-bin:pod-name/usr/local/bin/etcd
# firewalld:/usr/lib/firewalld/services/etcd-client.xml
# yaml:/home/install/k8s-self/template/master/k8s-etcd-backup.yaml
# shell:/home/install/k8s-self/scripts/etcd/afterInstall.sh 36-zhu
# 这是定时备份etcd数据的任务
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: k8s-etcd-backup-0
namespace: kube-system
spec:
# timezone is same as controller manager, default is UTC
# 国际标准时间18点换算为北京时间2点
schedule: "12 18 * * *"
concurrencyPolicy: Replace # #并发调度策略:Allow运行同时运行过个任务。Forbid:不运行并发执行。Replace:替换之前的任务
failedJobsHistoryLimit: 2 # 为失败的任务执行保留历史记录数,默认为1.
successfulJobsHistoryLimit: 2 # 为成功执行的任务保留历史记录,默认值为3;所以可以看到6个运行完成的cronjob生成的pod
startingDeadlineSeconds: 3600 # 因为各种原因缺乏执行作业的时间点导致的启动作业错误的超时时长,会被记入错误历史记录;
jobTemplate: # Job控制器模板,用于为CronJob控制器生成Job对象
spec:
template:
metadata:
labels:
app: k8s-etcd-backup
spec:
tolerations: # Taints(污点),Tolerations(容忍)aints定义在Node节点上,声明污点及标准行为,Tolerations定义在Pod,声明可接受得污点。当前容忍度为允许没有污点的master节点执行任务,通过kubectl describe node nodename | greo Tains结果为none
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution: # 硬亲和性:实现的是强制性规则,是Pod调度时必须满足的规则,否则Pod对象的状态会一直是Pending
nodeSelectorTerms: # #nodeSelectorTerms可以定义多条约束,只需满足其中一条。
- matchExpressions: # matchExpressions可以定义多条约束,必须满足全部约束。
- key: kubernetes.io/hostname # 强制绑定到label标签kubernetes.io/hostname的value是k8s-hostname-node1的node上
operator: In # In:label的值在某个列表中
values:
- k8s-hostname-node1 # 每个job和node一对一亲和绑定
containers:
- name: k8s-etcd-backup
image: harborIP/kubernetes/etcd:3.4.3-0
imagePullPolicy: IfNotPresent
resources:
requests:
cpu: "0"
memory: "0"
limits:
cpu: 1000m
memory: 1Gi
env:
- name: ENDPOINTS
value: "https://k8s-node1:2379"
command:
- /bin/sh
- -c
- |
set -ex # -e 脚本中的命令一旦运行失败就终止脚本的执行 -x 用于显示出命令与其执行结果debug模式
rm -rf /data/backup/tmp
mkdir -p /data/backup/tmp && test -d /data/backup/tmp || exit 1; #判断
export backupfilename=`date +"%Y%m%d%H%M%S"`; # 设置环境变量
test -f /certs/ca.pem || (rm -rf /data/backup/tmp && exit 1);test -f /certs/client.pem || (rm -rf /data/backup/tmp && exit 1);test -f /certs/client-key.pem || (rm -rf /data/backup/tmp &&exit 1);\ # 确认是否存在证书文件
ETCDCTL_API=3 /usr/local/bin/etcdctl \
--endpoints=$ENDPOINTS \
--cacert=/certs/ca.pem \
--cert=/certs/client.pem \
--key=/certs/client-key.pem \
--command-timeout=1800s \
snapshot save /data/backup/tmp/etcd-snapshot.db && \ # etcd数据备份命令
cd /data/backup/tmp; tar -czf /data/backup/etcd-snapshot-${backupfilename}.tar.gz * && \
cd -; rm -rf /data/backup/tmp
if [ $? -ne 0 ]; then # 如果运行失败则exit1
exit 1
fi
# delete old file more than 7
count=0;
for file in `ls -t /data/backup/*tar.gz`
do
count=`expr $count + 1`
if [ $count -gt 7 ]; then
rm -rf $file
fi
done
volumeMounts: # 容器目录
- name: master-backup
mountPath: /data/backup
- name: etcd-certs
mountPath: /certs
- name: timezone
mountPath: /etc/localtime
readOnly: true
volumes: # 映射到宿主机的目录
- name: master-backup # 备份文件目录
hostPath:
path: /var/lib/etcd_backup
- name: etcd-certs
hostPath:
path: /etc/etcd/pki # cert文件目录
- name: timezone
hostPath:
path: /etc/localtime # 系统时区文件
restartPolicy: Never # 重启策略,job执行完毕自动退出无需重启
hostNetwork: true
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: k8s-etcd-backup-1
namespace: kube-system
spec:
# timezone is same as controller manager, default is UTC
schedule: "12 19 * * *"
concurrencyPolicy: Replace
failedJobsHistoryLimit: 2
successfulJobsHistoryLimit: 2
startingDeadlineSeconds: 3600
jobTemplate:
spec:
template:
metadata:
labels:
app: k8s-etcd-backup
spec:
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- k8s-hostname-master
containers:
- name: k8s-etcd-backup
image: harborIP/kubernetes/etcd:3.4.3-0
imagePullPolicy: IfNotPresent
resources:
requests:
cpu: "0"
memory: "0"
limits:
cpu: 1000m
memory: 1Gi
env:
- name: ENDPOINTS
value: "https://k8s-master:2379"
command:
- /bin/sh
- -c
- |
set -ex
rm -rf /data/backup/tmp
mkdir -p /data/backup/tmp && test -d /data/backup/tmp || exit 1;
export backupfilename=`date +"%Y%m%d%H%M%S"`;
test -f /certs/ca.pem || (rm -rf /data/backup/tmp && exit 1);test -f /certs/client.pem || (rm -rf /data/backup/tmp && exit 1);test -f /certs/client-key.pem || (rm -rf /data/backup/tmp &&exit 1);\
ETCDCTL_API=3 /usr/local/bin/etcdctl \
--endpoints=$ENDPOINTS \
--cacert=/certs/ca.pem \
--cert=/certs/client.pem \
--key=/certs/client-key.pem \
--command-timeout=1800s \
snapshot save /data/backup/tmp/etcd-snapshot.db && \
cd /data/backup/tmp; tar -czf /data/backup/etcd-snapshot-${backupfilename}.tar.gz * && \
cd -; rm -rf /data/backup/tmp
if [ $? -ne 0 ]; then
exit 1
fi
# delete old file more than 7
count=0;
for file in `ls -t /data/backup/*tar.gz`
do
count=`expr $count + 1`
if [ $count -gt 7 ]; then
rm -rf $file
fi
done
volumeMounts:
- name: master-backup
mountPath: /data/backup
- name: etcd-certs
mountPath: /certs
- name: timezone
mountPath: /etc/localtime
readOnly: true
volumes:
- name: master-backup
hostPath:
path: /var/lib/etcd_backup
- name: etcd-certs
hostPath:
path: /etc/etcd/pki
- name: timezone
hostPath:
path: /etc/localtime
restartPolicy: Never
hostNetwork: true
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: k8s-etcd-backup-2
namespace: kube-system
spec:
# timezone is same as controller manager, default is UTC
schedule: "12 20 * * *"
concurrencyPolicy: Replace
failedJobsHistoryLimit: 2
successfulJobsHistoryLimit: 2
startingDeadlineSeconds: 3600
jobTemplate:
spec:
template:
metadata:
labels:
app: k8s-etcd-backup
spec:
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- k8s-hostname-node2
containers:
- name: k8s-etcd-backup
image: harborIP/kubernetes/etcd:3.4.3-0
imagePullPolicy: IfNotPresent
resources:
requests:
cpu: "0"
memory: "0"
limits:
cpu: 1000m
memory: 1Gi
env:
- name: ENDPOINTS
value: "https://k8s-node2:2379"
command:
- /bin/sh
- -c
- |
set -ex
rm -rf /data/backup/tmp
mkdir -p /data/backup/tmp && test -d /data/backup/tmp || exit 1;
export backupfilename=`date +"%Y%m%d%H%M%S"`;
test -f /certs/ca.pem || (rm -rf /data/backup/tmp && exit 1);test -f /certs/client.pem || (rm -rf /data/backup/tmp && exit 1);test -f /certs/client-key.pem || (rm -rf /data/backup/tmp &&exit 1);\
ETCDCTL_API=3 /usr/local/bin/etcdctl \
--endpoints=$ENDPOINTS \
--cacert=/certs/ca.pem \
--cert=/certs/client.pem \
--key=/certs/client-key.pem \
--command-timeout=1800s \
snapshot save /data/backup/tmp/etcd-snapshot.db && \
cd /data/backup/tmp; tar -czf /data/backup/etcd-snapshot-${backupfilename}.tar.gz * && \
cd -; rm -rf /data/backup/tmp
if [ $? -ne 0 ]; then
exit 1
fi
# delete old file more than 7
count=0;
for file in `ls -t /data/backup/*tar.gz`
do
count=`expr $count + 1`
if [ $count -gt 7 ]; then
rm -rf $file
fi
done
volumeMounts:
- name: master-backup
mountPath: /data/backup
- name: etcd-certs
mountPath: /certs
- name: timezone
mountPath: /etc/localtime
readOnly: true
volumes:
- name: master-backup
hostPath:
path: /var/lib/etcd_backup
- name: etcd-certs
hostPath:
path: /etc/etcd/pki
- name: timezone
hostPath:
path: /etc/localtime
restartPolicy: Never
hostNetwork: true