mirror of https://github.com/ceph/ceph-ansible.git
Add handlers for containerized deployment
Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>pull/1727/head
parent
fc6b6e9859
commit
7a333d05ce
|
@ -97,13 +97,21 @@
|
|||
|
||||
tasks:
|
||||
|
||||
- name: disable ceph rgw service
|
||||
# For backward compatibility
|
||||
- name: disable ceph rgw service (old unit name, for backward compatibility)
|
||||
service:
|
||||
name: "ceph-rgw@{{ ansible_hostname }}"
|
||||
state: stopped
|
||||
enabled: no
|
||||
ignore_errors: true
|
||||
|
||||
- name: disable ceph rgw service (new unit name)
|
||||
service:
|
||||
name: "ceph-radosgw@{{ ansible_hostname }}"
|
||||
state: stopped
|
||||
enabled: no
|
||||
ignore_errors: true
|
||||
|
||||
- name: remove ceph rgw container
|
||||
docker:
|
||||
image: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
|
||||
|
@ -113,8 +121,12 @@
|
|||
|
||||
- name: remove ceph rgw service
|
||||
file:
|
||||
path: /etc/systemd/system/ceph-rgw@.service
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
with_items:
|
||||
# For backward compatibility
|
||||
- /etc/systemd/system/ceph-rgw@.service
|
||||
- /etc/systemd/system/ceph-radosgw@.service
|
||||
|
||||
- name: remove ceph rgw image
|
||||
docker_image:
|
||||
|
|
|
@ -450,7 +450,7 @@
|
|||
|
||||
- name: restart containerized ceph rgws with systemd
|
||||
service:
|
||||
name: ceph-rgw@{{ ansible_hostname }}
|
||||
name: ceph-radosgw@{{ ansible_hostname }}
|
||||
state: restarted
|
||||
enabled: yes
|
||||
when:
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
---
|
||||
# These checks are used to avoid running handlers at initial deployment.
|
||||
- name: check for a ceph socket
|
||||
shell: "stat /var/run/ceph/*.asok > /dev/null 2>&1"
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
always_run: true
|
||||
register: socket
|
||||
|
||||
- name: check for a rados gateway socket
|
||||
shell: "stat {{ rbd_client_admin_socket_path }}*.asok > /dev/null 2>&1"
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
always_run: true
|
||||
register: socketrgw
|
|
@ -105,7 +105,6 @@
|
|||
- ceph_current_fsid.rc == 0
|
||||
- mon_group_name in group_names
|
||||
|
||||
- include: ./checks/check_socket.yml
|
||||
- include: create_ceph_initial_dirs.yml
|
||||
- include: generate_ceph_conf.yml
|
||||
- include: create_rbd_client_dir.yml
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
RETRIES="{{ handler_health_osd_check_retries }}"
|
||||
DELAY="{{ handler_health_osd_check_delay }}"
|
||||
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
|
||||
|
||||
check_pgs() {
|
||||
while [ $RETRIES -ne 0 ]; do
|
||||
test "[""$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
|
||||
RET=$?
|
||||
test $RET -eq 0 && return 0
|
||||
sleep $DELAY
|
||||
let RETRIES=RETRIES-1
|
||||
done
|
||||
# PGs not clean, exiting with return code 1
|
||||
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
|
||||
echo "It is possible that the cluster has less OSDs than the replica configuration"
|
||||
echo "Will refuse to continue"
|
||||
ceph $CEPH_CLI -s
|
||||
exit 1
|
||||
}
|
||||
|
||||
for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
|
||||
# First, restart daemon(s)
|
||||
systemctl restart ceph-osd@${id}
|
||||
# We need to wait because it may take some time for the socket to actually exists
|
||||
COUNT=10
|
||||
# Wait and ensure the socket exists after restarting the daemon
|
||||
SOCKET=/var/run/ceph/{{ cluster }}-osd.${id}.asok
|
||||
while [ $COUNT -ne 0 ]; do
|
||||
test -S $SOCKET && check_pgs && continue 2
|
||||
sleep 1
|
||||
let COUNT=COUNT-1
|
||||
done
|
||||
# If we reach this point, it means the socket is not present.
|
||||
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
|
||||
exit 1
|
||||
done
|
|
@ -367,7 +367,7 @@ os_tuning_params:
|
|||
##########
|
||||
# DOCKER #
|
||||
##########
|
||||
|
||||
docker_exec_cmd:
|
||||
docker: false
|
||||
ceph_docker_image: "ceph/daemon"
|
||||
ceph_docker_image_tag: latest
|
||||
|
|
|
@ -17,39 +17,51 @@
|
|||
- name: restart ceph mon daemon(s)
|
||||
command: /tmp/restart_mon_daemon.sh
|
||||
listen: "restart ceph mons"
|
||||
|
||||
when:
|
||||
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
|
||||
- socket.rc == 0
|
||||
- ceph_current_fsid.rc == 0
|
||||
- mon_group_name in group_names
|
||||
|
||||
# This does not just restart OSDs but everything else too. Unfortunately
|
||||
# at this time the ansible role does not have an OSD id list to use
|
||||
# for restarting them specifically.
|
||||
- block:
|
||||
- name: copy osd restart script
|
||||
template:
|
||||
src: restart_osd_daemon.sh.j2
|
||||
dest: /tmp/restart_osd_daemon.sh
|
||||
owner: root
|
||||
group: root
|
||||
mode: 0750
|
||||
listen: "restart ceph osds"
|
||||
|
||||
- name: restart ceph osds daemon(s)
|
||||
command: /tmp/restart_osd_daemon.sh
|
||||
listen: "restart ceph osds"
|
||||
when: handler_health_osd_check
|
||||
|
||||
- name: copy osd restart script
|
||||
template:
|
||||
src: restart_osd_daemon.sh.j2
|
||||
dest: /tmp/restart_osd_daemon.sh
|
||||
owner: root
|
||||
group: root
|
||||
mode: 0750
|
||||
listen: "restart ceph osds"
|
||||
when:
|
||||
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
|
||||
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
|
||||
- inventory_hostname in play_hosts
|
||||
- osd_group_name in group_names
|
||||
|
||||
- name: restart containerized ceph osds daemon(s)
|
||||
command: /tmp/restart_osd_daemon.sh
|
||||
listen: "restart ceph osds"
|
||||
with_items: "{{ socket_osd_container.results }}"
|
||||
when:
|
||||
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
|
||||
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
|
||||
- ((crush_location is defined and crush_location) or item.get('rc') == 0)
|
||||
- handler_health_osd_check
|
||||
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
|
||||
- inventory_hostname in play_hosts
|
||||
- osd_group_name in group_names
|
||||
|
||||
- name: restart non-containerized ceph osds daemon(s)
|
||||
command: /tmp/restart_osd_daemon.sh
|
||||
listen: "restart ceph osds"
|
||||
when:
|
||||
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
|
||||
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
|
||||
- ((crush_location is defined and crush_location) or socket.rc == 0)
|
||||
- ceph_current_fsid.rc == 0
|
||||
- osd_group_name in group_names
|
||||
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
|
||||
- handler_health_osd_check
|
||||
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
|
||||
- inventory_hostname in play_hosts
|
||||
- osd_group_name in group_names
|
||||
|
||||
- name: restart ceph mdss
|
||||
service:
|
|
@ -0,0 +1,21 @@
|
|||
---
|
||||
# These checks are used to avoid running handlers at initial deployment.
|
||||
- name: check for a ceph socket
|
||||
shell: |
|
||||
{{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/*.asok > /dev/null 2>&1'
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
always_run: true
|
||||
register: socket
|
||||
|
||||
- name: check for a ceph socket in containerized deployment (osds)
|
||||
shell: |
|
||||
docker exec ceph-osd-"{{ ansible_hostname }}"-"{{ item | replace('/', '') }}" bash -c 'stat /var/run/ceph/*.asok > /dev/null 2>&1'
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
always_run: true
|
||||
register: socket_osd_container
|
||||
with_items: "{{ devices }}"
|
||||
when:
|
||||
- containerized_deployment
|
||||
- inventory_hostname in groups.get(osd_group_name)
|
|
@ -1,2 +1,3 @@
|
|||
---
|
||||
- include: facts.yml
|
||||
- include: check_socket.yml
|
||||
|
|
|
@ -3,13 +3,12 @@
|
|||
RETRIES="{{ handler_health_mon_check_retries }}"
|
||||
DELAY="{{ handler_health_mon_check_delay }}"
|
||||
MONITOR_NAME="{{ monitor_name }}"
|
||||
CLUSTER="{{ cluster }}"
|
||||
SOCKET=/var/run/ceph/${CLUSTER}-mon.${MONITOR_NAME}.asok
|
||||
SOCKET=/var/run/ceph/{{ cluster }}-mon.${MONITOR_NAME}.asok
|
||||
|
||||
|
||||
check_quorum() {
|
||||
while [ $RETRIES -ne 0 ]; do
|
||||
MEMBERS=$(ceph --cluster ${CLUSTER} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
|
||||
MEMBERS=$({{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
|
||||
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
|
||||
sleep $DELAY
|
||||
let RETRIES=RETRIES-1
|
||||
|
@ -17,7 +16,7 @@ done
|
|||
# If we reach this point, it means there is a problem with the quorum
|
||||
echo "Error with quorum."
|
||||
echo "cluster status:"
|
||||
ceph --cluster ${CLUSTER} -s
|
||||
{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
@ -27,7 +26,7 @@ systemctl restart ceph-mon@${MONITOR_NAME}
|
|||
COUNT=10
|
||||
# Wait and ensure the socket exists after restarting the daemon
|
||||
while [ $COUNT -ne 0 ]; do
|
||||
test -S $SOCKET && check_quorum
|
||||
{{ docker_exec_cmd }} test -S $SOCKET && check_quorum
|
||||
sleep 1
|
||||
let COUNT=COUNT-1
|
||||
done
|
|
@ -0,0 +1,78 @@
|
|||
#!/bin/bash
|
||||
|
||||
RETRIES="{{ handler_health_osd_check_retries }}"
|
||||
DELAY="{{ handler_health_osd_check_delay }}"
|
||||
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
|
||||
|
||||
check_pgs() {
|
||||
while [ $RETRIES -ne 0 ]; do
|
||||
test "[""$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
|
||||
RET=$?
|
||||
test $RET -eq 0 && return 0
|
||||
sleep $DELAY
|
||||
let RETRIES=RETRIES-1
|
||||
done
|
||||
# PGs not clean, exiting with return code 1
|
||||
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
|
||||
echo "It is possible that the cluster has less OSDs than the replica configuration"
|
||||
echo "Will refuse to continue"
|
||||
$docker_exec ceph "$CEPH_CLI" -s
|
||||
exit 1
|
||||
}
|
||||
|
||||
wait_for_socket_in_docker() {
|
||||
if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then
|
||||
log "Timed out while trying to look for a Ceph OSD socket."
|
||||
log "Abort mission!"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
get_dev_name() {
|
||||
echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'
|
||||
}
|
||||
|
||||
get_docker_id_from_dev_name() {
|
||||
local id
|
||||
local count
|
||||
count=10
|
||||
while [ $count -ne 0 ]; do
|
||||
id=$(docker ps -q -f "name=$1")
|
||||
test "$id" != "" && break
|
||||
sleep 1
|
||||
let count=count-1
|
||||
done
|
||||
echo "$id"
|
||||
}
|
||||
|
||||
get_docker_osd_id() {
|
||||
wait_for_socket_in_docker $1
|
||||
docker exec "$1" ls /var/run/ceph | cut -d'.' -f2
|
||||
}
|
||||
|
||||
# For containerized deployments, the unit file looks like: ceph-osd@sda.service
|
||||
# For non-containerized deployments, the unit file looks like: ceph-osd@0.service
|
||||
for unit in $(systemctl list-units | grep -oE "ceph-osd@([0-9]{1,2}|[a-z]+).service"); do
|
||||
# First, restart daemon(s)
|
||||
systemctl restart "${unit}"
|
||||
# We need to wait because it may take some time for the socket to actually exists
|
||||
COUNT=10
|
||||
# Wait and ensure the socket exists after restarting the daemon
|
||||
{% if containerized_deployment -%}
|
||||
id=$(get_dev_name "$unit")
|
||||
container_id=$(get_docker_id_from_dev_name "$id")
|
||||
osd_id=$(get_docker_osd_id "$container_id")
|
||||
docker_exec="docker exec $container_id"
|
||||
{% else %}
|
||||
osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]{1,2}')
|
||||
{% endif %}
|
||||
SOCKET=/var/run/ceph/test-osd.${osd_id}.asok
|
||||
while [ $COUNT -ne 0 ]; do
|
||||
$docker_exec test -S "$SOCKET" && check_pgs && continue 2
|
||||
sleep 1
|
||||
let COUNT=COUNT-1
|
||||
done
|
||||
# If we reach this point, it means the socket is not present.
|
||||
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
|
||||
exit 1
|
||||
done
|
|
@ -44,10 +44,15 @@
|
|||
config_type: ini
|
||||
when:
|
||||
- (not mon_containerized_default_ceph_conf_with_kv and
|
||||
(inventory_hostname in groups.get(mon_group_name, []))) or
|
||||
(inventory_hostname in groups.get(mon_group_name, []) or inventory_hostname in groups.get(osd_group_name, []))) or
|
||||
(not mon_containerized_default_ceph_conf_with_kv and
|
||||
((groups.get(nfs_group_name, []) | length > 0)
|
||||
and (inventory_hostname == groups.get(nfs_group_name, [])[0])))
|
||||
notify:
|
||||
- restart ceph mons
|
||||
- restart ceph osds
|
||||
- restart ceph mdss
|
||||
- restart ceph rgws
|
||||
|
||||
- name: set fsid fact when generate_fsid = true
|
||||
set_fact:
|
||||
|
|
|
@ -105,7 +105,6 @@ openstack_keys:
|
|||
##########
|
||||
# DOCKER #
|
||||
##########
|
||||
docker_exec_cmd:
|
||||
ceph_mon_docker_subnet: "{{ public_network }}"# subnet of the monitor_interface
|
||||
|
||||
# ceph_mon_docker_extra_env:
|
||||
|
|
|
@ -2,14 +2,21 @@
|
|||
- name: generate systemd unit file
|
||||
become: true
|
||||
template:
|
||||
src: "{{ role_path }}/templates/ceph-rgw.service.j2"
|
||||
dest: /etc/systemd/system/ceph-rgw@.service
|
||||
src: "{{ role_path }}/templates/ceph-radosgw.service.j2"
|
||||
dest: /etc/systemd/system/ceph-radosgw@.service
|
||||
owner: "root"
|
||||
group: "root"
|
||||
mode: "0644"
|
||||
|
||||
# For backward compatibility
|
||||
- name: disable old systemd unit ('ceph-rgw@') if present
|
||||
service:
|
||||
name: ceph-rgw@{{ ansible_hostname }}
|
||||
state: disable
|
||||
ignore_errors: true
|
||||
|
||||
- name: enable systemd unit file for rgw instance
|
||||
shell: systemctl enable ceph-rgw@{{ ansible_hostname }}.service
|
||||
shell: systemctl enable ceph-radosgw@{{ ansible_hostname }}.service
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
|
@ -20,7 +27,7 @@
|
|||
|
||||
- name: systemd start rgw container
|
||||
service:
|
||||
name: ceph-rgw@{{ ansible_hostname }}
|
||||
name: ceph-radosgw@{{ ansible_hostname }}
|
||||
state: started
|
||||
enabled: yes
|
||||
changed_when: false
|
||||
|
|
Loading…
Reference in New Issue