From 155e2a23d54ea29ccbf5414cb93cdc748c516e79 Mon Sep 17 00:00:00 2001
From: Dimitri Savineau <dsavinea@redhat.com>
Date: Tue, 21 Jul 2020 14:51:20 -0400
Subject: [PATCH] rolling_update: stop/start instead of restart

During the daemon upgrade we're
  - stopping the service when it's not containerized
  - running the daemon role
  - start the service when it's not containerized
  - restart the service when it's containerized

This implementation has multiple issue.

1/ We don't use the same service workflow when using containers
or baremetal.

2/ The explicity daemon start isn't required since we'are already
doing this in the daemon role.

3/ Any non backward changes in the systemd unit template (for
containerized deployment) won't work due to the restart usage.

This patch refacts the rolling_update playbook by using the same service
stop task for both containerized and baremetal deployment at the start
of the upgrade play.
It removes the explicit service start task because it's already included
in the dedicated role.
The service restart tasks for containerized deployment are also
removed.

Finally, this adds the missing service stop task for ceph crash upgrade
workflow.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1859173

Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
---
 infrastructure-playbooks/rolling_update.yml | 139 +++-----------------
 1 file changed, 19 insertions(+), 120 deletions(-)

diff --git a/infrastructure-playbooks/rolling_update.yml b/infrastructure-playbooks/rolling_update.yml
index 9f2c37307..54ff1e893 100644
--- a/infrastructure-playbooks/rolling_update.yml
+++ b/infrastructure-playbooks/rolling_update.yml
@@ -191,7 +191,6 @@
         enabled: no
         masked: yes
       ignore_errors: True
-      when: not containerized_deployment | bool
 
     # NOTE: we mask the service so the RPM can't restart it
     # after the package gets upgraded
@@ -202,7 +201,6 @@
         enabled: no
         masked: yes
       ignore_errors: True
-      when: not containerized_deployment | bool
 
     # only mask the service for mgr because it must be upgraded
     # after ALL monitors, even when collocated
@@ -226,28 +224,12 @@
     - import_role:
         name: ceph-mon
 
-    - name: start ceph mon
-      systemd:
-        name: ceph-mon@{{ monitor_name }}
-        state: started
-        enabled: yes
-      when: not containerized_deployment | bool
-
     - name: start ceph mgr
       systemd:
         name: ceph-mgr@{{ ansible_hostname }}
         state: started
         enabled: yes
       ignore_errors: True # if no mgr collocated with mons
-      when: not containerized_deployment | bool
-
-    - name: restart containerized ceph mon
-      systemd:
-        name: ceph-mon@{{ monitor_name }}
-        state: restarted
-        enabled: yes
-        daemon_reload: yes
-      when: containerized_deployment | bool
 
     - name: non container | waiting for the monitor to join the quorum...
       command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" -s --format json
@@ -392,18 +374,10 @@
       shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi"
       register: osd_ids
       changed_when: false
-      when: not containerized_deployment | bool
 
-    - name: get osd unit names - container
-      shell: systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+).service"
-      register: osd_names
-      changed_when: false
-      when: containerized_deployment | bool
-
-    - name: set num_osds for container
+    - name: set num_osds
       set_fact:
-        num_osds: "{{ osd_names.stdout_lines|default([])|length }}"
-      when: containerized_deployment | bool
+        num_osds: "{{ osd_ids.stdout_lines|default([])|length }}"
 
     - name: set_fact container_exec_cmd_osd
       set_fact:
@@ -417,12 +391,6 @@
         enabled: no
         masked: yes
       with_items: "{{ osd_ids.stdout_lines }}"
-      when: not containerized_deployment | bool
-
-    - name: set num_osds for non container
-      set_fact:
-        num_osds: "{{ osd_ids.stdout_lines|default([])|length }}"
-      when: not containerized_deployment | bool
 
     - import_role:
         name: ceph-handler
@@ -437,25 +405,6 @@
     - import_role:
         name: ceph-osd
 
-    - name: start ceph osd
-      systemd:
-        name: ceph-osd@{{ item }}
-        state: started
-        enabled: yes
-        masked: no
-      with_items: "{{ osd_ids.stdout_lines }}"
-      when: not containerized_deployment | bool
-
-    - name: restart containerized ceph osd
-      systemd:
-        name: "{{ item }}"
-        state: restarted
-        enabled: yes
-        masked: no
-        daemon_reload: yes
-      with_items: "{{ osd_names.stdout_lines }}"
-      when: containerized_deployment | bool
-
     - name: scan ceph-disk osds with ceph-volume if deploying nautilus
       command: "ceph-volume --cluster={{ cluster }} simple scan --force"
       environment:
@@ -614,7 +563,6 @@
         name: ceph-mds@{{ ansible_hostname }}
         enabled: no
         masked: yes
-      when: not containerized_deployment | bool
 
     - import_role:
         name: ceph-handler
@@ -629,14 +577,6 @@
     - import_role:
         name: ceph-mds
 
-    - name: restart ceph mds
-      systemd:
-        name: ceph-mds@{{ ansible_hostname }}
-        state: restarted
-        enabled: yes
-        masked: no
-        daemon_reload: yes
-
 
 - name: upgrade standbys ceph mdss cluster
   vars:
@@ -656,7 +596,6 @@
         name: ceph-mds@{{ ansible_hostname }}
         enabled: no
         masked: yes
-      when: not containerized_deployment | bool
 
     - import_role:
         name: ceph-handler
@@ -671,14 +610,6 @@
     - import_role:
         name: ceph-mds
 
-    - name: restart ceph mds
-      systemd:
-        name: ceph-mds@{{ ansible_hostname }}
-        state: restarted
-        enabled: yes
-        masked: no
-        daemon_reload: yes
-
     - name: set max_mds
       command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs set {{ cephfs }} max_mds {{ mds_max_mds }}"
       changed_when: false
@@ -714,7 +645,6 @@
         enabled: no
         masked: yes
       with_items: "{{ rgw_instances }}"
-      when: not containerized_deployment | bool
 
     - import_role:
         name: ceph-handler
@@ -729,16 +659,6 @@
     - import_role:
         name: ceph-rgw
 
-    - name: restart containerized ceph rgw
-      systemd:
-        name: ceph-radosgw@rgw.{{ ansible_hostname }}.{{ item.instance_name }}
-        state: restarted
-        enabled: yes
-        masked: no
-        daemon_reload: yes
-      with_items: "{{ rgw_instances }}"
-      when: containerized_deployment | bool
-
 
 - name: upgrade ceph rbd mirror node
   vars:
@@ -771,23 +691,6 @@
     - import_role:
         name: ceph-rbd-mirror
 
-    - name: start ceph rbd mirror
-      systemd:
-        name: "ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}"
-        state: started
-        enabled: yes
-        masked: no
-      when: not containerized_deployment | bool
-
-    - name: restart containerized ceph rbd mirror
-      systemd:
-        name: ceph-rbd-mirror@rbd-mirror.{{ ansible_hostname }}
-        state: restarted
-        enabled: yes
-        masked: no
-        daemon_reload: yes
-      when: containerized_deployment | bool
-
 
 - name: upgrade ceph nfs node
   vars:
@@ -808,6 +711,17 @@
       failed_when: false
       when: not containerized_deployment | bool
 
+    - name: systemd stop nfs container
+      systemd:
+        name: ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }}
+        state: stopped
+        enabled: no
+        masked: yes
+      failed_when: false
+      when:
+        - ceph_nfs_enable_service | bool
+        - containerized_deployment | bool
+
     - import_role:
         name: ceph-defaults
     - import_role:
@@ -825,27 +739,6 @@
     - import_role:
         name: ceph-nfs
 
-    - name: start nfs gateway
-      systemd:
-        name: nfs-ganesha
-        state: started
-        enabled: yes
-        masked: no
-      when:
-        - not containerized_deployment | bool
-        - ceph_nfs_enable_service | bool
-
-    - name: systemd restart nfs container
-      systemd:
-        name: ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }}
-        state: restarted
-        enabled: yes
-        masked: no
-        daemon_reload: yes
-      when:
-        - ceph_nfs_enable_service | bool
-        - containerized_deployment | bool
-
 
 - name: upgrade ceph iscsi gateway node
   vars:
@@ -923,6 +816,12 @@
   gather_facts: false
   become: true
   tasks:
+    - name: stop the ceph-crash service
+      systemd:
+        name: "{{ 'ceph-crash@' + ansible_hostname if containerized_deployment | bool else 'ceph-crash.service' }}"
+        state: stopped
+        enabled: no
+        masked: yes
     - import_role:
         name: ceph-defaults
     - import_role: