diff --git a/operations/cluster-maintenance.yml b/cluster-maintenance.yml similarity index 100% rename from operations/cluster-maintenance.yml rename to cluster-maintenance.yml diff --git a/cluster-os-migration.yml b/cluster-os-migration.yml new file mode 100644 index 000000000..278c0aa40 --- /dev/null +++ b/cluster-os-migration.yml @@ -0,0 +1,432 @@ +--- +# This playbook was meant to upgrade a node from Ubuntu to RHEL. +# We are performing a set of actions prior to reboot the node. +# The node reboots via PXE and gets its new operating system. +# This playbook only works for monitors and OSDs. +# Note that some of the checks are ugly: +# ie: the when migration_completed.stat.exists +# can be improved with includes, however I wanted to keep a single file... +# + +- hosts: mons + serial: 1 + sudo: True + + vars: + backup_dir: /tmp/ + + tasks: + + - name: Check if the node has be migrated already + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed + register: migration_completed + ignore_errors: True + + - name: Check for failed run + stat: > + path=/var/lib/ceph/{{ ansible_hostname }}.tar + register: mon_archive_leftover + + - fail: msg="Looks like an archive is already there, please remove it!" + when: migration_completed.stat.exists == False and mon_archive_leftover.stat.exists == True + + - name: Compress the store as much as possible + command: ceph tell mon.{{ ansible_hostname }} compact + when: migration_completed.stat.exists == False + + - name: Check if sysvinit + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit + register: monsysvinit + changed_when: False + + - name: Check if upstart + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart + register: monupstart + changed_when: False + + - name: Check if init does what it is supposed to do (Sysvinit) + shell: > + ps faux|grep -sq [c]eph-mon && service ceph status mon >> /dev/null + register: ceph_status_sysvinit + changed_when: False + + # can't complete the condition since the previous taks never ran... + - fail: msg="Something is terribly wrong here, sysvinit is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True + + - name: Check if init does what it is supposed to do (upstart) + shell: > + ps faux|grep -sq [c]eph-mon && status ceph-mon-all >> /dev/null + register: ceph_status_upstart + changed_when: False + + - fail: msg="Something is terribly wrong here, upstart is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True + + - name: Restart the Monitor after compaction (Upstart) + service: > + name=ceph-mon + state=restarted + args=id={{ ansible_hostname }} + when: monupstart.stat.exists == True and migration_completed.stat.exists == False + + - name: Restart the Monitor after compaction (Sysvinit) + service: > + name=ceph + state=restarted + args=mon + when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False + + - name: Wait for the monitor to be up again + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port=6789 + timeout=10 + when: migration_completed.stat.exists == False + + - name: Stop the monitor (Upstart) + service: > + name=ceph-mon + state=stopped + args=id={{ ansible_hostname }} + when: monupstart.stat.exists == True and migration_completed.stat.exists == False + + - name: Stop the monitor (Sysvinit) + service: > + name=ceph + state=stopped + args=mon + when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False + + - name: Wait for the monitor to be down + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port=6789 + timeout=10 + state=stopped + when: migration_completed.stat.exists == False + + - name: Create a backup directory + file: > + path={{ backup_dir }}/monitors-backups + state=directory + owner=root + group=root + mode=0644 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + # NOTE (leseb): should we convert upstart to sysvinit here already? + - name: Archive monitor stores + shell: > + tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_hostname }}.tar + chdir=/var/lib/ceph/ + creates={{ ansible_hostname }}.tar + when: migration_completed.stat.exists == False + + - name: Scp the Monitor store + fetch: > + src=/var/lib/ceph/{{ ansible_hostname }}.tar + dest={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar + flat=yes + when: migration_completed.stat.exists == False + + - name: Reboot the server + command: reboot + when: migration_completed.stat.exists == False + + - name: Wait for the server to come up + local_action: > + wait_for + port=22 + delay=10 + timeout=3600 + when: migration_completed.stat.exists == False + + - name: Wait a bit more to be sure that the server is ready + pause: seconds=20 + when: migration_completed.stat.exists == False + + - name: Check if sysvinit + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit + register: monsysvinit + changed_when: False + + - name: Check if upstart + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart + register: monupstart + changed_when: False + + - name: Make sure the monitor is stopped (Upstart) + service: > + name=ceph-mon + state=stopped + args=id={{ ansible_hostname }} + when: monupstart.stat.exists == True and migration_completed.stat.exists == False + + - name: Make sure the monitor is stopped (Sysvinit) + service: > + name=ceph + state=stopped + args=mon + when: monsysvinit.stat.exists == True and migration_completed.stat.exists == False + + # NOTE (leseb): 'creates' was added in Ansible 1.6 + - name: Copy and unarchive the monitor store + unarchive: > + src={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar + dest=/var/lib/ceph/ + copy=yes + mode=0600 + creates=etc/ceph/ceph.conf + when: migration_completed.stat.exists == False + + - name: Copy keys and configs + shell: > + cp etc/ceph/* /etc/ceph/ + chdir=/var/lib/ceph/ + when: migration_completed.stat.exists == False + + - name: Configure RHEL7 for sysvinit + shell: find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; + when: migration_completed.stat.exists == False + + # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary + # so we directly call sysvinit + - name: Start the monitor + service: > + name=ceph + state=started + args=mon + when: migration_completed.stat.exists == False + + - name: Wait for the Monitor to be up again + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port=6789 + timeout=10 + when: migration_completed.stat.exists == False + + - name: Waiting for the monitor to join the quorum... + shell: > + ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }} + register: result + until: result.rc == 0 + retries: 5 + delay: 10 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + - name: Done moving to the next monitor + file: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/migration_completed + state=touch + owner=root + group=root + mode=0600 + when: migration_completed.stat.exists == False + +- hosts: osds + serial: 1 + sudo: True + + vars: + backup_dir: /tmp/ + + tasks: + - name: Check if the node has be migrated already + stat: > + path=/var/lib/ceph/migration_completed + register: migration_completed + ignore_errors: True + + - name: Check for failed run + stat: > + path=/var/lib/ceph/{{ ansible_hostname }}.tar + register: osd_archive_leftover + + - fail: msg="Looks like an archive is already there, please remove it!" + when: migration_completed.stat.exists == False and osd_archive_leftover.stat.exists == True + + - name: Check if init does what it is supposed to do (Sysvinit) + shell: > + ps faux|grep -sq [c]eph-osd && service ceph status osd >> /dev/null + register: ceph_status_sysvinit + changed_when: False + + # can't complete the condition since the previous taks never ran... + - fail: msg="Something is terribly wrong here, sysvinit is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True + + - name: Check if init does what it is supposed to do (upstart) + shell: > + ps faux|grep -sq [c]eph-osd && initctl list|egrep -sq "ceph-osd \(ceph/.\) start/running, process [0-9][0-9][0-9][0-9]" + register: ceph_status_upstart + changed_when: False + + - fail: msg="Something is terribly wrong here, upstart is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!" + when: ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True + + - name: Set the noout flag + command: ceph osd set noout + delegate_to: "{{ item }}" + with_items: groups.mons[0] + when: migration_completed.stat.exists == False + + - name: Check if sysvinit + shell: stat /var/lib/ceph/osd/ceph-*/sysvinit + register: osdsysvinit + ignore_errors: True + changed_when: False + + - name: Check if upstart + shell: stat /var/lib/ceph/osd/ceph-*/upstart + register: osdupstart + ignore_errors: True + changed_when: False + + - name: Archive ceph configs + shell: > + tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar + chdir=/var/lib/ceph/ + creates={{ ansible_hostname }}.tar + when: migration_completed.stat.exists == False + + - name: Create backup directory + file: > + path={{ backup_dir }}/osds-backups + state=directory + owner=root + group=root + mode=0644 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + - name: Scp OSDs dirs and configs + fetch: > + src=/var/lib/ceph/{{ ansible_hostname }}.tar + dest={{ backup_dir }}/osds-backups/ + flat=yes + when: migration_completed.stat.exists == False + + - name: Collect OSD ports + shell: netstat -tlpn | awk -F ":" '/ceph-osd/ { sub (" .*", "", $2); print $2 }' | uniq + register: osd_ports + when: migration_completed.stat.exists == False + + - name: Gracefully stop the OSDs (Upstart) + service: > + name=ceph-osd-all + state=stopped + when: osdupstart.rc == 0 and migration_completed.stat.exists == False + + - name: Gracefully stop the OSDs (Sysvinit) + service: > + name=ceph + state=stopped + args=mon + when: osdsysvinit.rc == 0 and migration_completed.stat.exists == False + + - name: Wait for the OSDs to be down + local_action: > + wait_for + host={{ ansible_ssh_host | default(inventory_hostname) }} + port={{ item }} + timeout=10 + state=stopped + with_items: + - "{{ osd_ports.stdout_lines }}" + when: migration_completed.stat.exists == False + + - name: Configure RHEL with sysvinit + shell: find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; + when: migration_completed.stat.exists == False + + - name: Reboot the server + command: reboot + when: migration_completed.stat.exists == False + + - name: Wait for the server to come up + local_action: > + wait_for + port=22 + delay=10 + timeout=3600 + when: migration_completed.stat.exists == False + + - name: Wait a bit to be sure that the server is ready for scp + pause: seconds=20 + when: migration_completed.stat.exists == False + + # NOTE (leseb): 'creates' was added in Ansible 1.6 + - name: Copy and unarchive the OSD configs + unarchive: > + src={{ backup_dir }}/osds-backups/{{ ansible_hostname }}.tar + dest=/var/lib/ceph/ + copy=yes + mode=0600 + creates=etc/ceph/ceph.conf + when: migration_completed.stat.exists == False + + - name: Copy keys and configs + shell: > + cp etc/ceph/* /etc/ceph/ + chdir=/var/lib/ceph/ + when: migration_completed.stat.exists == False + + # NOTE (leseb): at this point the upstart and sysvinit checks are not necessary + # so we directly call sysvinit + - name: Start all the OSDs + service: > + name=ceph-osd-all + state=started + args=osd + when: migration_completed.stat.exists == False + + # NOTE (leseb): this is tricky unless this is set into the ceph.conf + # listened ports can be predicted, thus they will change after each restart +# - name: Wait for the OSDs to be up again +# local_action: > +# wait_for +# host={{ ansible_ssh_host | default(inventory_hostname) }} +# port={{ item }} +# timeout=30 +# with_items: +# - "{{ osd_ports.stdout_lines }}" + + - name: Waiting for clean PGs... + shell: > + test "$(ceph pg stat | sed 's/^.*pgs://' | sed 's/active+clean.*//' |sed 's/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//' | sed 's/^.*://' | sed 's/ //')" && ceph health | egrep -q "HEALTH_OK|HEALTH_WARN" + register: result + until: result.rc == 0 + retries: 10 + delay: 10 + delegate_to: "{{ item }}" + with_items: groups.backup[0] + when: migration_completed.stat.exists == False + + - name: Done moving to the next OSD + file: > + path=/var/lib/ceph/migration_completed + state=touch + owner=root + group=root + mode=0600 + when: migration_completed.stat.exists == False + + - name: Unset the noout flag + command: ceph osd unset noout + delegate_to: "{{ item }}" + with_items: groups.mons[0] + when: migration_completed.stat.exists == False diff --git a/operations/cluster-operating-system-migration.yml b/operations/cluster-operating-system-migration.yml deleted file mode 100644 index b09798b96..000000000 --- a/operations/cluster-operating-system-migration.yml +++ /dev/null @@ -1,249 +0,0 @@ ---- -# This playbook was meant to upgrade a node from Ubuntu to RHEL. -# We are performing a set of actions prior to reboot the node. -# The node reboots via PXE and gets its new operating system. -# This playbook only works for monitors and OSDs. - -- hosts: mons - serial: 1 - sudo: True - - vars: - backup_dir: /tmp/ - - pre_tasks: - - name: Compress the store as much as possible - command: ceph tell mon.{{ ansible_hostname }} compact - - - name: Check if sysvinit - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit - register: sysvinit - - - name: Check if upstart - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart - register: upstart - - - name: Restart the Monitor after compaction (Upstart) - service: name=ceph-mon-all state=restarted - when: upstart.stat.exists == True - - - name: Restart the Monitor after compaction (Sysvinit) - service: name=ceph state=restarted args=mon - when: sysvinit.stat.exists == True - - - name: Wait for the monitor to be up again - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port=6789 - timeout=10 - - - name: Stop the monitor (Upstart) - service: name=ceph-mon-all state=started - when: upstart.stat.exists == True - - - name: Stop the monitor (Sysvinit) - service: name=ceph state=started args=mon - when: sysvinit.stat.exists == True - - - name: Wait for the monitor to be down - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port=6789 - timeout=10 - state=stopped - - - name: Create a backup directory - file: > - path={{ backup_dir }}/monitors-backups - state=directory - owner=root - group=root - mode=0644 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - - - name: Archive monitor stores - shell: > - tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar - chdir=/var/lib/ceph/ - creates={{ ansible_hostname }}.tar - - - name: Scp the Monitor store - fetch: > - src=/var/lib/ceph/{{ ansible_hostname }}.tar - dest={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar - flat=yes - - tasks: - - name: Reboot the server - command: reboot - - - name: Wait for the server to come up - local_action: > - wait_for - port=22 - delay=10 - timeout=3600 - - - name: Wait a bit more to be sure that the server is ready - pause: seconds=20 - - - name: Check if sysvinit - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit - register: sysvinit - - - name: Check if upstart - stat: > - path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart - register: upstart - - - name: Make sure the monitor is stopped (Upstart) - service: name=ceph-mon-all state=started - when: upstart.stat.exists == True - - - name: Make sure the monitor is stopped (Sysvinit) - service: name=ceph state=started args=mon - when: sysvinit.stat.exists == True - - - name: Scp back monitor store - copy: > - src={{ backup_dir }}/monitors-backups/{{ ansible_hostname }}.tar - dest=/var/lib/ceph/{{ ansible_hostname }}.tar - - - name: Untar the monitor store - shell: > - tar -xzvf {{ ansible_hostname }}.tar --overwrite --overwrite-dir - chdir=/var/lib/ceph/ - creates=etc/ceph/ceph.conf - - - name: Configure RHEL7 for sysvinit - shell: find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; - - - name: Start the monitor - service: > - name=ceph - state=started - pattern=/usr/bin/ceph-mon - args=mon - - - name: Wait for the Monitor to be up again - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port=6789 - timeout=10 - - - name: Waiting for a quorum... - shell: > - ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }} - register: result - until: result.rc == 0 - retries: 5 - delay: 10 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - -- hosts: osds - serial: 1 - sudo: True - - vars: - backup_dir: /tmp/ - - pre_tasks: - - name: Set the noout flag - command: ceph osd set noout - delegate_to: "{{ item }}" - with_items: groups.mons[0] - - tasks: - - name: Archive ceph configs - shell: > - tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_hostname }}.tar - chdir=/var/lib/ceph/ - creates={{ ansible_hostname }}.tar - - - name: Create backup directory - file: > - path={{ backup_dir }}/osds-backups - state=directory - owner=root - group=root - mode=0644 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - - - name: Scp OSDs dirs and configs - fetch: > - src=/var/lib/ceph/{{ ansible_hostname }}.tar - dest={{ backup_dir }}/osds-backups/ - flat=yes - - - name: Reboot the server - command: reboot - - - name: Wait for the server to come up - local_action: > - wait_for - port=22 - delay=10 - timeout=3600 - - - name: Wait a bit to be sure that the server is ready for scp - pause: seconds=20 - - - name: Scp back OSDs dirs and configs - copy: > - src={{ backup_dir }}/osds-backups/{{ ansible_hostname }}.tar - dest=/var/lib/ceph/{{ ansible_hostname }}.tar - - - name: Untar the OSD config - shell: > - tar -xzvf {{ ansible_hostname }}.tar --overwrite --overwrite-dir - chdir=/var/lib/ceph/ - creates=etc/ceph/ceph.conf - - - name: Configure RHEL with sysvinit - shell: find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \; - - - name: Copy ceph.conf - command: > - cp etc/ceph/ceph.conf /etc/ceph/ceph.conf - chdir=/var/lib/ceph/ - - - name: Start all the OSDs - service: > - name=ceph - state=started - pattern=/usr/bin/ceph-osd - args=osd - - - name: Wait for the OSDs to be up again - local_action: > - wait_for - host={{ ansible_ssh_host | default(inventory_hostname) }} - port={{ item }} - timeout=10 - with_items: - - 6800 - - - name: Waiting for clean PGs... - shell: > - test "$(ceph pg stat | sed 's/^.*pgs://' | sed 's/active+clean.*//' |sed 's/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//' | sed 's/^.*://' | sed 's/ //')" && ceph -s | egrep -q "HEALTH_OK|HEALTH_WARN" - register: result - until: result.rc == 0 - retries: 10 - delay: 10 - delegate_to: "{{ item }}" - with_items: groups.backup[0] - -# post_tasks: - - name: Unset the noout flag - command: ceph osd unset noout - delegate_to: "{{ item }}" - with_items: groups.mons[0] diff --git a/operations/rolling_update.yml b/operations/rolling_update.yml deleted file mode 100644 index 3c606271c..000000000 --- a/operations/rolling_update.yml +++ /dev/null @@ -1,56 +0,0 @@ ---- -# This playbook does a rolling update for all the Ceph services -# Change the value of serial: to adjust the number of server to be updated. -# -# The four roles that apply to the ceph hosts will be applied: ceph-common, -# ceph-mon, ceph-osd and ceph-mds. So any changes to configuration, package updates, etc, -# will be applied as part of the rolling update process. -# - -# /!\ DO NOT FORGET TO CHANGE THE RELEASE VERSION FIRST! /!\ - -- hosts: - - mons - - osds - - mdss - - rgws - sudo: True - roles: - - ceph-common - -- hosts: mons - serial: 1 - sudo: True - roles: - - ceph-mon - post_tasks: - - name: restart monitor(s) - service: > - name=ceph - state=restarted - args=mon - -- hosts: osds - serial: 1 - sudo: True - roles: - - ceph-osd - post_tasks: - - name: restart object storage daemon(s) - command: service ceph-osd-all restart - when: ansible_distribution == "Ubuntu" - - name: restart object storage daemon(s) - service: name=ceph state=restarted args=osd - when: ansible_distribution == "Debian" - -- hosts: mdss - serial: 1 - sudo: True - roles: - - ceph-mds - post_tasks: - - name: restart metadata server(s) - service: > - name=ceph - state=restarted - args=mds diff --git a/operations/purge-cluster.yml b/purge-cluster.yml similarity index 100% rename from operations/purge-cluster.yml rename to purge-cluster.yml diff --git a/roles/ceph-mon/tasks/deploy_monitors.yml b/roles/ceph-mon/tasks/deploy_monitors.yml index 6f8fcfc07..06d954a57 100644 --- a/roles/ceph-mon/tasks/deploy_monitors.yml +++ b/roles/ceph-mon/tasks/deploy_monitors.yml @@ -23,14 +23,3 @@ command: > ceph-mon --mkfs -i {{ ansible_hostname }} --fsid {{ fsid }} --keyring /var/lib/ceph/tmp/keyring.mon.{{ ansible_hostname }} creates=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/keyring - -- name: Start and add that the monitor service to the init sequence - service: > - name=ceph - state=started - enabled=yes - args=mon - -- name: Get Ceph monitor version - shell: ceph daemon mon."{{ ansible_hostname }}" version | cut -d '"' -f 4 | cut -f 1,2 -d '.' - register: ceph_version diff --git a/roles/ceph-mon/tasks/main.yml b/roles/ceph-mon/tasks/main.yml index 079768a5c..0f544efd1 100644 --- a/roles/ceph-mon/tasks/main.yml +++ b/roles/ceph-mon/tasks/main.yml @@ -2,6 +2,9 @@ - include: deploy_monitors.yml when: not ceph_containerized_deployment +- include: start_monitor.yml + when: not ceph_containerized_deployment + - include: ceph_keys.yml when: not ceph_containerized_deployment diff --git a/roles/ceph-mon/tasks/start_monitor.yml b/roles/ceph-mon/tasks/start_monitor.yml new file mode 100644 index 000000000..94d94f764 --- /dev/null +++ b/roles/ceph-mon/tasks/start_monitor.yml @@ -0,0 +1,44 @@ +--- +- name: Activate monitor with upstart + file: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/{{ item }} + state=touch + owner=root + group=root + mode=0600 + with_items: + - done + - upstart + when: ansible_distribution == "Ubuntu" + +- name: Activate monitor with sysvinit + file: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/{{ item }} + state=touch + owner=root + group=root + mode=0600 + with_items: + - done + - sysvinit + when: ansible_distribution != "Ubuntu" + +- name: Start and add that the monitor service to the init sequence (Ubuntu) + service: > + name=ceph-mon + state=started + enabled=yes + args="id={{ ansible_hostname }}" + when: ansible_distribution == "Ubuntu" + +- name: Start and add that the monitor service to the init sequence + service: > + name=ceph + state=started + enabled=yes + args=mon + when: ansible_distribution != "Ubuntu" + +- name: Get Ceph monitor version + shell: ceph daemon mon."{{ ansible_hostname }}" version | cut -d '"' -f 4 | cut -f 1,2 -d '.' + register: ceph_version diff --git a/roles/ceph-restapi/tasks/main.yml b/roles/ceph-restapi/tasks/main.yml index 4671a87ee..cc3bf9edd 100644 --- a/roles/ceph-restapi/tasks/main.yml +++ b/roles/ceph-restapi/tasks/main.yml @@ -7,5 +7,5 @@ ignore_errors: True - name: Start Ceph REST API - shell: "nohup ceph-rest-api -n client.restapi &" + shell: "nohup ceph-rest-api &" when: restapi_status.rc != 0 diff --git a/rolling_update.yml b/rolling_update.yml new file mode 100644 index 000000000..607096a22 --- /dev/null +++ b/rolling_update.yml @@ -0,0 +1,176 @@ +--- +# This playbook does a rolling update for all the Ceph services +# Change the value of serial: to adjust the number of server to be updated. +# +# The four roles that apply to the ceph hosts will be applied: ceph-common, +# ceph-mon, ceph-osd and ceph-mds. So any changes to configuration, package updates, etc, +# will be applied as part of the rolling update process. +# + +# /!\ DO NOT FORGET TO CHANGE THE RELEASE VERSION FIRST! /!\ + +- hosts: mons + serial: 1 + sudo: True + + pre_tasks: + - name: Compress the store as much as possible + command: ceph tell mon.{{ ansible_hostname }} compact + + roles: + - ceph-common + - ceph-mon + + post_tasks: + - name: Check if sysvinit + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit + register: monsysvinit + + - name: Check if upstart + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart + register: monupstart + + - name: Restart the monitor after compaction (Upstart) + service: > + name=ceph-mon + state=restarted + args=id={{ ansible_hostname }} + when: monupstart.stat.exists == True + + - name: Restart the monitor after compaction (Sysvinit) + service: > + name=ceph + state=restarted + args=mon + when: monsysvinit.stat.exists == True + + - name: restart monitor(s) + service: > + name=ceph + state=restarted + args=mon + + - name: Waiting for the monitor to join the quorum... + shell: > + ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_hostname }} + register: result + until: result.rc == 0 + retries: 5 + delay: 10 + delegate_to: 127.0.0.1 + + +- hosts: osds + serial: 1 + sudo: True + + pre_tasks: + - name: Set the noout flag + command: ceph osd set noout + delegate_to: "{{ item }}" + with_items: groups.mons[0] + + roles: + - ceph-common + - ceph-osd + + post_tasks: + - name: Check if sysvinit + shell: stat /var/lib/ceph/osd/ceph-*/sysvinit + register: osdsysvinit + ignore_errors: True + + - name: Check if upstart + shell: stat /var/lib/ceph/osd/ceph-*/upstart + register: osdupstart + ignore_errors: True + + - name: Gracefully stop the OSDs (Upstart) + service: > + name=ceph-osd-all + state=restarted + when: osdupstart.rc == 0 + + - name: Gracefully stop the OSDs (Sysvinit) + service: > + name=ceph + state=restarted + args=mon + when: osdsysvinit.rc == 0 and + + - name: Waiting for clean PGs... + shell: > + test "$(ceph pg stat | sed 's/^.*pgs://' | sed 's/active+clean.*//' |sed 's/ //')" -eq "$(ceph pg stat | sed 's/pgs.*//' | sed 's/^.*://' | sed 's/ //')" && ceph health | egrep -q "HEALTH_OK|HEALTH_WARN" + register: result + until: result.rc == 0 + retries: 10 + delay: 10 + delegate_to: 127.0.0.1 + + - name: Unset the noout flag + command: ceph osd unset noout + delegate_to: "{{ item }}" + with_items: groups.mons[0] + + +- hosts: mdss + serial: 1 + sudo: True + + roles: + - ceph-common + - ceph-mds + + post_tasks: + - name: Check if sysvinit + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/sysvinit + register: mdssysvinit + + - name: Check if upstart + stat: > + path=/var/lib/ceph/mon/ceph-{{ ansible_hostname }}/upstart + register: mdsupstart + + - name: Restart the metadata server (Upstart) + service: > + name=ceph-mds + state=restarted + args=id={{ ansible_hostname }} + when: mdsupstart.stat.exists == True + + - name: Restart the metadata server (Sysvinit) + service: > + name=ceph + state=restarted + args=mds + when: mdssysvinit.stat.exists == True + + +- hosts: rgws + serial: 1 + sudo: True + + roles: + - ceph-common + - ceph-radosgw + + post_tasks: + - name: restart rados gateway server(s) + service: > + name={{ item }} + state=restarted + with_items: + - radosgw + when: radosgw_frontend == 'civetweb' + + - name: restart rados gateway server(s) + service: > + name={{ item }} + state=restarted + with_items: + - apache2 + - radosgw + when: radosgw_frontend == 'apache'