ceph-ansible/infrastructure-playbooks/rolling_update.yml

---
# This playbook does a rolling update for all the Ceph services
# Change the value of 'serial:' to adjust the number of server to be updated.
#
# The four roles that apply to the ceph hosts will be applied: ceph-common,
# ceph-mon, ceph-osd and ceph-mds. So any changes to configuration, package updates, etc,
# will be applied as part of the rolling update process.
#

# /!\ DO NOT FORGET TO CHANGE THE RELEASE VERSION FIRST! /!\

- name: confirm whether user really meant to upgrade the cluster
  hosts: localhost

  vars_prompt:
    - name: ireallymeanit
      prompt: Are you sure you want to upgrade the cluster?
      default: 'no'
      private: no

  tasks:
  - name: exit playbook, if user did not mean to upgrade cluster
    fail:
      msg: >
        "Exiting rolling_update.yml playbook, cluster was NOT upgraded.
         To upgrade the cluster, either say 'yes' on the prompt or
         or use `-e ireallymeanit=yes` on the command line when
         invoking the playbook"
    when: ireallymeanit != 'yes'

- name: gather facts and check the init system
  vars:
    mon_group_name:       mons
    osd_group_name:       osds
    mds_group_name:       mdss
    rgw_group_name:       rgws

  hosts:
    - "{{ mon_group_name }}"
    - "{{ osd_group_name }}"
    - "{{ mds_group_name }}"
    - "{{ rgw_group_name }}"

  become: True
  tasks:
    - debug: msg="gather facts on all Ceph hosts for following reference"
    - name: check if sysvinit
      stat:
        path: /etc/rc?.d/S??ceph
        follow: yes
      register: is_sysvinit

    - name: check if upstart
      stat:
        path: /var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart
      register: is_upstart

    - name: check if systemd
      command: grep -sq systemd /proc/1/comm
      register: is_systemd


- name: upgrade ceph mon cluster

  vars:
    mon_group_name:       mons
    restapi_group_name:   restapis
    health_mon_check_retries: 5
    health_mon_check_delay: 10

  hosts:
    - "{{ mon_group_name }}"

  serial: 1
  become: True

  roles:
    - ceph-common
    - ceph-mon

  post_tasks:
    - include_vars: roles/ceph-common/defaults/main.yml
    - include_vars: roles/ceph-mon/defaults/main.yml
    - include_vars: roles/ceph-restapi/defaults/main.yml
    - include_vars: group_vars/all
      failed_when: false
    - include_vars: group_vars/{{ mon_group_name }}
      failed_when: false
    - include_vars: group_vars/{{ restapi_group_name }}
      failed_when: false

    - name: restart ceph mons with upstart
      service:
        name: ceph-mon
        state: restarted
        args: id={{ ansible_hostname }}
      when: is_upstart.stat.exists == True

    - name: restart ceph mons with sysvinit
      service:
        name: ceph
        state: restarted
      when: is_sysvinit.stat.exists == True

    - name: restart ceph mons with systemd
      service:
        name: ceph-mon@{{ ansible_hostname }}
        state: restarted
        enabled: yes
      when: is_systemd

    - name: select a running monitor
      set_fact: mon_host={{ item }}
      with_items: "{{ groups.mons }}"
      when: item != inventory_hostname

    - name: waiting for the monitor to join the quorum...
      shell: |
        ceph -s  --cluster {{ cluster }} | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }}
      register: result
      until: result.rc == 0
      retries: "{{ health_mon_check_retries }}"
      delay: "{{ health_mon_check_delay }}"
      delegate_to: "{{ mon_host }}"


- name: upgrade ceph osds cluster

  vars:
    osd_group_name: osds
    health_osd_check_retries: 10
    health_osd_check_delay: 10

  hosts:
    - "{{ osd_group_name }}"

  serial: 1
  become: True

  pre_tasks:
    - include_vars: roles/ceph-common/defaults/main.yml
    - include_vars: roles/ceph-osd/defaults/main.yml
    - include_vars: group_vars/all
      failed_when: false
    - include_vars: group_vars/{{ osd_group_name }}
      failed_when: false

    - name: set osd flags
      command: ceph osd set {{ item }} --cluster {{ cluster }}
      with_items:
        - noout
        - noscrub
        - nodeep-scrub
      delegate_to: "{{ groups.mons[0] }}"

  roles:
    - ceph-common
    - ceph-osd

  post_tasks:
    - include_vars: roles/ceph-common/defaults/main.yml
    - include_vars: roles/ceph-osd/defaults/main.yml
    - include_vars: group_vars/all
      failed_when: false
    - include_vars: group_vars/{{ osd_group_name }}
      failed_when: false

    - name: get osd numbers
      shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | cut -d '-' -f 2 ; fi"
      register: osd_ids
      changed_when: false

    - name: restart ceph osds (upstart)
      service:
        name: ceph-osd-all
        state: restarted
      when: is_upstart.stat.exists == True

    - name: restart ceph osds (sysvinit)
      service:
        name: ceph
        state: restarted
      when: is_sysvinit.stat.exists == True

    - name: restart ceph osds (systemd)
      service:
        name: ceph-osd@{{item}}
        state: restarted
        enabled: yes
      with_items: "{{ osd_ids.stdout_lines }}"
      when: is_systemd

    - name: waiting for clean pgs...
      shell: |
        test "$(ceph pg stat --cluster {{ cluster }} | sed 's/^.*pgs://;s/active+clean.*//;s/ //')" -eq "$(ceph pg stat --cluster {{ cluster }}  | sed 's/pgs.*//;s/^.*://;s/ //')" && ceph health --cluster {{ cluster }}  | egrep -sq "HEALTH_OK|HEALTH_WARN"
      register: result
      until: result.rc == 0
      retries: "{{ health_osd_check_retries }}"
      delay: "{{ health_osd_check_delay }}"
      delegate_to: "{{ groups.mons[0] }}"

    - name: unset osd flags
      command: ceph osd unset {{ item }} --cluster {{ cluster }}
      with_items:
        - noout
        - noscrub
        - nodeep-scrub
      delegate_to: "{{ groups.mons[0] }}"


- name: upgrade ceph mdss cluster

  vars:
    mds_group_name: mdss

  hosts:
    - "{{ mds_group_name }}"

  serial: 1
  become: True

  roles:
    - ceph-common
    - ceph-mds

  post_tasks:
    - include_vars: roles/ceph-common/defaults/main.yml
    - include_vars: roles/ceph-mds/defaults/main.yml
    - include_vars: group_vars/all
      failed_when: false
    - include_vars: group_vars/{{ mds_group_name }}
      failed_when: false

    - name: restart ceph mdss with upstart
      service:
        name: ceph-mds
        state: restarted
        args: id={{ ansible_hostname }}
      when: is_upstart.stat.exists == True

    - name: restart ceph mdss with sysvinit
      service:
        name: ceph
        state: restarted
        args: mds
      when: is_sysvinit.stat.exists == True

    - name: restart ceph mdss with systemd
      service:
        name: ceph-mds@{{ ansible_hostname }}
        state: restarted
        enabled: yes
      when: is_systemd


- name: upgrade ceph rgws cluster

  vars:
    rgw_group_name: rgws

  hosts:
    - "{{ rgw_group_name }}"

  serial: 1
  become: True

  roles:
    - ceph-common
    - ceph-rgw

  post_tasks:
    - include_vars: roles/ceph-common/defaults/main.yml
    - include_vars: roles/ceph-rgw/defaults/main.yml
    - include_vars: group_vars/all
      failed_when: false
    - include_vars: group_vars/{{ rgw_group_name }}
      failed_when: false

    - name: restart ceph rgws with systemd
      service:
        name: ceph-radosgw@rgw.{{ ansible_hostname }}
        state: restarted
        enabled: yes
      when: is_systemd

    - name: restart ceph rgws with sysvinit
      service:
        name: radosgw
        state: restarted
      when: ansible_os_family != 'RedHat'

    - name: restart rados gateway server(s)
      service:
        name: ceph-radosgw
        state: restarted
      when: ansible_os_family != 'RedHat'