Merge pull request #1455 from ceph/restart_daemons

Common: Restore check_socket
pull/1429/merge
Andrew Schoen 2017-04-24 06:54:07 -07:00 committed by GitHub
commit b28424334a
6 changed files with 31 additions and 9 deletions

View File

@ -19,6 +19,9 @@
listen: "restart ceph mons"
when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- socket.rc == 0
- ceph_current_fsid.rc == 0
- mon_group_name in group_names
# This does not just restart OSDs but everything else too. Unfortunately
@ -37,10 +40,15 @@
- name: restart ceph osds daemon(s)
command: /tmp/restart_osd_daemon.sh
listen: "restart ceph osds"
when:
- handler_health_osd_check
when: handler_health_osd_check
when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- socket.rc == 0
- ceph_current_fsid.rc == 0
- osd_group_name in group_names
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
- item in play_hosts
- name: restart ceph mdss
service:

View File

@ -0,0 +1,15 @@
---
# These checks are used to avoid running handlers at initial deployment.
- name: check for a ceph socket
shell: "stat /var/run/ceph/*.asok > /dev/null 2>&1"
changed_when: false
failed_when: false
always_run: true
register: socket
- name: check for a rados gateway socket
shell: "stat {{ rbd_client_admin_socket_path }}*.asok > /dev/null 2>&1"
changed_when: false
failed_when: false
always_run: true
register: socketrgw

View File

@ -87,6 +87,7 @@
static: False
- include: facts.yml
- include: ./checks/check_socket.yml
- include: create_ceph_initial_dirs.yml
- include: generate_cluster_fsid.yml
- include: generate_ceph_conf.yml

View File

@ -15,6 +15,9 @@ while [ $RETRIES -ne 0 ]; do
let RETRIES=RETRIES-1
done
# If we reach this point, it means there is a problem with the quorum
echo "Error with quorum."
echo "cluster status:"
ceph --cluster ${CLUSTER} -s
exit 1
}
@ -29,5 +32,5 @@ while [ $COUNT -ne 0 ]; do
let COUNT=COUNT-1
done
# If we reach this point, it means the socket is not present.
echo "Error while restarting mon daemon"
echo "Socket file ${SOCKET} could not be found, which means the monitor is not running."
exit 1

View File

@ -17,7 +17,6 @@ check_pgs() {
exit 1
}
for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
# First, restart daemon(s)
systemctl restart ceph-osd@${id}
@ -31,6 +30,6 @@ for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
let COUNT=COUNT-1
done
# If we reach this point, it means the socket is not present.
echo "Error while restarting mon daemon"
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
exit 1
done

View File

@ -12,7 +12,3 @@ journal_collocation: True
os_tuning_params:
- { name: kernel.pid_max, value: 4194303 }
- { name: fs.file-max, value: 26234859 }
ceph_conf_overrides:
global:
osd_pool_default_pg_num: 8
osd_pool_default_size: 1