From 370554cad7b1013f0ac2bbc9901ca9f5b7d8f677 Mon Sep 17 00:00:00 2001 From: yangyuliufeng Date: Wed, 23 Jun 2021 14:26:25 +0800 Subject: [PATCH] fix etcd dir bug Signed-off-by: yangyuliufeng --- docs/op/cluster_restore.md | 28 ++++++++++++++++------------ roles/cluster-restore/tasks/main.yml | 4 ++-- roles/etcd/clean-etcd.yml | 3 ++- roles/etcd/tasks/main.yml | 2 +- roles/etcd/templates/etcd.service.j2 | 2 +- 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/docs/op/cluster_restore.md b/docs/op/cluster_restore.md index 0b69a9f..294a7c6 100644 --- a/docs/op/cluster_restore.md +++ b/docs/op/cluster_restore.md @@ -10,19 +10,18 @@ - 1.首先搭建一个测试集群,部署几个测试deployment,验证集群各项正常后,进行一次备份: ``` bash -$ ansible-playbook /etc/ansible/23.backup.yml +$ ansible-playbook -i clusters/k8s-01/hosts -e @clusters/k8s-01/config.yml playbooks/94.backup.yml ``` -执行完毕可以在备份目录下检查备份情况,示例如下: +执行完毕可以在部署主机的备份目录下检查备份情况,示例如下: ``` -/etc/ansible/.cluster/backup/ -├── hosts -├── hosts-201907030954 -├── snapshot-201907030954.db -├── snapshot-201907031048.db +/etc/kubeasz/clusters/k8s-01/backup/ +├── snapshot_202106201205.db +├── snapshot_202106211406.db └── snapshot.db ``` +其中,snapshot.db始终为最近一次备份文件 - 2.模拟误删除操作(略) @@ -31,16 +30,21 @@ $ ansible-playbook /etc/ansible/23.backup.yml 可以在 `roles/cluster-restore/defaults/main.yml` 文件中配置需要恢复的 etcd备份版本(从上述备份目录中选取),默认使用最近一次备份;执行恢复后,需要一定时间等待 pod/svc 等资源恢复重建。 ``` bash -$ ansible-playbook /etc/ansible/24.restore.yml +$ ansible-playbook -i clusters/k8s-01/hosts -e @clusters/k8s-01/config.yml playbooks/94.backup.yml95.restore.yml ``` 如果集群主要组件(master/etcd/node)等出现不可恢复问题,可以尝试使用如下步骤 [清理]() --> [创建]() --> [恢复]() ``` bash -$ ansible-playbook /etc/ansible/99.clean.yml -$ ansible-playbook /etc/ansible/90.setup.yml -$ ansible-playbook /etc/ansible/24.restore.yml +$ ansible-playbook -i clusters/k8s-01/hosts -e @clusters/k8s-01/config.yml playbooks/99.clean.yml +$ ezctl setup k8s-01 01 +$ ezctl setup k8s-01 02 +$ ezctl setup k8s-01 03 +$ ezctl setup k8s-01 04 +$ ezctl setup k8s-01 05 +... +$ ansible-playbook -i clusters/k8s-01/hosts -e @clusters/k8s-01/config.yml playbooks/95.restore.yml ``` ## 参考 -- https://github.com/coreos/etcd/blob/master/Documentation/op-guide/recovery.md +- https://etcd.io/docs/v3.4/op-guide/recovery/ diff --git a/roles/cluster-restore/tasks/main.yml b/roles/cluster-restore/tasks/main.yml index 06fbb56..ddce863 100644 --- a/roles/cluster-restore/tasks/main.yml +++ b/roles/cluster-restore/tasks/main.yml @@ -2,7 +2,7 @@ service: name=etcd state=stopped - name: 清除etcd 数据目录 - file: name=/var/lib/etcd/member state=absent + file: name={{ ETCD_DATA_DIR }}/member state=absent - name: 生成备份目录 file: name=/etcd_backup state=directory @@ -24,7 +24,7 @@ --initial-advertise-peer-urls https://{{ inventory_hostname }}:2380" - name: 恢复数据至etcd 数据目录 - shell: "cp -rf /etcd_backup/etcd-{{ inventory_hostname }}.etcd/member /var/lib/etcd/" + shell: "cp -rf /etcd_backup/etcd-{{ inventory_hostname }}.etcd/member {{ ETCD_DATA_DIR }}/" - name: 重启etcd 服务 service: name=etcd state=restarted diff --git a/roles/etcd/clean-etcd.yml b/roles/etcd/clean-etcd.yml index c7331a7..d633fa8 100644 --- a/roles/etcd/clean-etcd.yml +++ b/roles/etcd/clean-etcd.yml @@ -12,6 +12,7 @@ - name: remove files and dirs file: name={{ item }} state=absent with_items: - - "/var/lib/etcd" + - {{ ETCD_DATA_DIR }} + - {{ ETCD_WAL_DIR }} - "/backup/k8s" - "/etc/systemd/system/etcd.service" diff --git a/roles/etcd/tasks/main.yml b/roles/etcd/tasks/main.yml index cc9d16c..20ed7d0 100644 --- a/roles/etcd/tasks/main.yml +++ b/roles/etcd/tasks/main.yml @@ -1,5 +1,5 @@ - name: prepare some dirs - file: name=/var/lib/etcd state=directory mode=0700 + file: name={{ ETCD_DATA_DIR }} state=directory mode=0700 - name: 下载etcd二进制文件 copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 diff --git a/roles/etcd/templates/etcd.service.j2 b/roles/etcd/templates/etcd.service.j2 index d602f0e..c1bbac3 100644 --- a/roles/etcd/templates/etcd.service.j2 +++ b/roles/etcd/templates/etcd.service.j2 @@ -7,7 +7,7 @@ Documentation=https://github.com/coreos [Service] Type=notify -WorkingDirectory=/var/lib/etcd/ +WorkingDirectory={{ ETCD_DATA_DIR }} ExecStart={{ bin_dir }}/etcd \ --name=etcd-{{ inventory_hostname }} \ --cert-file={{ ca_dir }}/etcd.pem \