diff --git a/roles/ceph-common/handlers/main.yml b/roles/ceph-common/handlers/main.yml index 9602a0ce5..1fc4a0ea4 100644 --- a/roles/ceph-common/handlers/main.yml +++ b/roles/ceph-common/handlers/main.yml @@ -19,6 +19,9 @@ listen: "restart ceph mons" when: +# We do not want to run these checks on initial deployment (`socket.rc == 0`) + - socket.rc == 0 + - ceph_current_fsid.rc == 0 - mon_group_name in group_names # This does not just restart OSDs but everything else too. Unfortunately @@ -37,10 +40,15 @@ - name: restart ceph osds daemon(s) command: /tmp/restart_osd_daemon.sh listen: "restart ceph osds" - when: - - handler_health_osd_check + when: handler_health_osd_check + when: +# We do not want to run these checks on initial deployment (`socket.rc == 0`) + - socket.rc == 0 + - ceph_current_fsid.rc == 0 - osd_group_name in group_names +# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below + - item in play_hosts - name: restart ceph mdss service: diff --git a/roles/ceph-common/tasks/checks/check_socket.yml b/roles/ceph-common/tasks/checks/check_socket.yml new file mode 100644 index 000000000..79b512c8e --- /dev/null +++ b/roles/ceph-common/tasks/checks/check_socket.yml @@ -0,0 +1,15 @@ +--- +# These checks are used to avoid running handlers at initial deployment. +- name: check for a ceph socket + shell: "stat /var/run/ceph/*.asok > /dev/null 2>&1" + changed_when: false + failed_when: false + always_run: true + register: socket + +- name: check for a rados gateway socket + shell: "stat {{ rbd_client_admin_socket_path }}*.asok > /dev/null 2>&1" + changed_when: false + failed_when: false + always_run: true + register: socketrgw diff --git a/roles/ceph-common/tasks/main.yml b/roles/ceph-common/tasks/main.yml index ca25812f7..daf8e3ff9 100644 --- a/roles/ceph-common/tasks/main.yml +++ b/roles/ceph-common/tasks/main.yml @@ -87,6 +87,7 @@ static: False - include: facts.yml +- include: ./checks/check_socket.yml - include: create_ceph_initial_dirs.yml - include: generate_cluster_fsid.yml - include: generate_ceph_conf.yml diff --git a/roles/ceph-common/templates/restart_mon_daemon.sh.j2 b/roles/ceph-common/templates/restart_mon_daemon.sh.j2 index d918b0198..4424ccacb 100644 --- a/roles/ceph-common/templates/restart_mon_daemon.sh.j2 +++ b/roles/ceph-common/templates/restart_mon_daemon.sh.j2 @@ -15,6 +15,9 @@ while [ $RETRIES -ne 0 ]; do let RETRIES=RETRIES-1 done # If we reach this point, it means there is a problem with the quorum +echo "Error with quorum." +echo "cluster status:" +ceph --cluster ${CLUSTER} -s exit 1 } @@ -29,5 +32,5 @@ while [ $COUNT -ne 0 ]; do let COUNT=COUNT-1 done # If we reach this point, it means the socket is not present. -echo "Error while restarting mon daemon" +echo "Socket file ${SOCKET} could not be found, which means the monitor is not running." exit 1 diff --git a/roles/ceph-common/templates/restart_osd_daemon.sh.j2 b/roles/ceph-common/templates/restart_osd_daemon.sh.j2 index 8b0b7d1de..0f8411161 100644 --- a/roles/ceph-common/templates/restart_osd_daemon.sh.j2 +++ b/roles/ceph-common/templates/restart_osd_daemon.sh.j2 @@ -17,7 +17,6 @@ check_pgs() { exit 1 } - for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do # First, restart daemon(s) systemctl restart ceph-osd@${id} @@ -31,6 +30,6 @@ for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do let COUNT=COUNT-1 done # If we reach this point, it means the socket is not present. - echo "Error while restarting mon daemon" + echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running." exit 1 done diff --git a/tests/functional/centos/7/journal-collocation/group_vars/all b/tests/functional/centos/7/journal-collocation/group_vars/all index a2f0b06dc..81857e670 100644 --- a/tests/functional/centos/7/journal-collocation/group_vars/all +++ b/tests/functional/centos/7/journal-collocation/group_vars/all @@ -12,7 +12,3 @@ journal_collocation: True os_tuning_params: - { name: kernel.pid_max, value: 4194303 } - { name: fs.file-max, value: 26234859 } -ceph_conf_overrides: - global: - osd_pool_default_pg_num: 8 - osd_pool_default_size: 1