mirror of https://github.com/ceph/ceph-ansible.git
Add handlers for containerized deployment
Until now, there is no handlers for containerized deployments. Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>pull/1727/head
parent
fc6b6e9859
commit
7a333d05ce
|
@ -97,13 +97,21 @@
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
|
|
||||||
- name: disable ceph rgw service
|
# For backward compatibility
|
||||||
|
- name: disable ceph rgw service (old unit name, for backward compatibility)
|
||||||
service:
|
service:
|
||||||
name: "ceph-rgw@{{ ansible_hostname }}"
|
name: "ceph-rgw@{{ ansible_hostname }}"
|
||||||
state: stopped
|
state: stopped
|
||||||
enabled: no
|
enabled: no
|
||||||
ignore_errors: true
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: disable ceph rgw service (new unit name)
|
||||||
|
service:
|
||||||
|
name: "ceph-radosgw@{{ ansible_hostname }}"
|
||||||
|
state: stopped
|
||||||
|
enabled: no
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
- name: remove ceph rgw container
|
- name: remove ceph rgw container
|
||||||
docker:
|
docker:
|
||||||
image: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
|
image: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}"
|
||||||
|
@ -113,8 +121,12 @@
|
||||||
|
|
||||||
- name: remove ceph rgw service
|
- name: remove ceph rgw service
|
||||||
file:
|
file:
|
||||||
path: /etc/systemd/system/ceph-rgw@.service
|
path: "{{ item }}"
|
||||||
state: absent
|
state: absent
|
||||||
|
with_items:
|
||||||
|
# For backward compatibility
|
||||||
|
- /etc/systemd/system/ceph-rgw@.service
|
||||||
|
- /etc/systemd/system/ceph-radosgw@.service
|
||||||
|
|
||||||
- name: remove ceph rgw image
|
- name: remove ceph rgw image
|
||||||
docker_image:
|
docker_image:
|
||||||
|
|
|
@ -450,7 +450,7 @@
|
||||||
|
|
||||||
- name: restart containerized ceph rgws with systemd
|
- name: restart containerized ceph rgws with systemd
|
||||||
service:
|
service:
|
||||||
name: ceph-rgw@{{ ansible_hostname }}
|
name: ceph-radosgw@{{ ansible_hostname }}
|
||||||
state: restarted
|
state: restarted
|
||||||
enabled: yes
|
enabled: yes
|
||||||
when:
|
when:
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
---
|
|
||||||
# These checks are used to avoid running handlers at initial deployment.
|
|
||||||
- name: check for a ceph socket
|
|
||||||
shell: "stat /var/run/ceph/*.asok > /dev/null 2>&1"
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
always_run: true
|
|
||||||
register: socket
|
|
||||||
|
|
||||||
- name: check for a rados gateway socket
|
|
||||||
shell: "stat {{ rbd_client_admin_socket_path }}*.asok > /dev/null 2>&1"
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
always_run: true
|
|
||||||
register: socketrgw
|
|
|
@ -105,7 +105,6 @@
|
||||||
- ceph_current_fsid.rc == 0
|
- ceph_current_fsid.rc == 0
|
||||||
- mon_group_name in group_names
|
- mon_group_name in group_names
|
||||||
|
|
||||||
- include: ./checks/check_socket.yml
|
|
||||||
- include: create_ceph_initial_dirs.yml
|
- include: create_ceph_initial_dirs.yml
|
||||||
- include: generate_ceph_conf.yml
|
- include: generate_ceph_conf.yml
|
||||||
- include: create_rbd_client_dir.yml
|
- include: create_rbd_client_dir.yml
|
||||||
|
|
|
@ -1,38 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
RETRIES="{{ handler_health_osd_check_retries }}"
|
|
||||||
DELAY="{{ handler_health_osd_check_delay }}"
|
|
||||||
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
|
|
||||||
|
|
||||||
check_pgs() {
|
|
||||||
while [ $RETRIES -ne 0 ]; do
|
|
||||||
test "[""$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$(ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
|
|
||||||
RET=$?
|
|
||||||
test $RET -eq 0 && return 0
|
|
||||||
sleep $DELAY
|
|
||||||
let RETRIES=RETRIES-1
|
|
||||||
done
|
|
||||||
# PGs not clean, exiting with return code 1
|
|
||||||
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
|
|
||||||
echo "It is possible that the cluster has less OSDs than the replica configuration"
|
|
||||||
echo "Will refuse to continue"
|
|
||||||
ceph $CEPH_CLI -s
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
for id in $(ls /var/lib/ceph/osd/ | sed 's/.*-//'); do
|
|
||||||
# First, restart daemon(s)
|
|
||||||
systemctl restart ceph-osd@${id}
|
|
||||||
# We need to wait because it may take some time for the socket to actually exists
|
|
||||||
COUNT=10
|
|
||||||
# Wait and ensure the socket exists after restarting the daemon
|
|
||||||
SOCKET=/var/run/ceph/{{ cluster }}-osd.${id}.asok
|
|
||||||
while [ $COUNT -ne 0 ]; do
|
|
||||||
test -S $SOCKET && check_pgs && continue 2
|
|
||||||
sleep 1
|
|
||||||
let COUNT=COUNT-1
|
|
||||||
done
|
|
||||||
# If we reach this point, it means the socket is not present.
|
|
||||||
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
|
|
||||||
exit 1
|
|
||||||
done
|
|
|
@ -367,7 +367,7 @@ os_tuning_params:
|
||||||
##########
|
##########
|
||||||
# DOCKER #
|
# DOCKER #
|
||||||
##########
|
##########
|
||||||
|
docker_exec_cmd:
|
||||||
docker: false
|
docker: false
|
||||||
ceph_docker_image: "ceph/daemon"
|
ceph_docker_image: "ceph/daemon"
|
||||||
ceph_docker_image_tag: latest
|
ceph_docker_image_tag: latest
|
||||||
|
|
|
@ -17,17 +17,14 @@
|
||||||
- name: restart ceph mon daemon(s)
|
- name: restart ceph mon daemon(s)
|
||||||
command: /tmp/restart_mon_daemon.sh
|
command: /tmp/restart_mon_daemon.sh
|
||||||
listen: "restart ceph mons"
|
listen: "restart ceph mons"
|
||||||
|
|
||||||
when:
|
when:
|
||||||
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
|
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
|
||||||
- socket.rc == 0
|
- socket.rc == 0
|
||||||
- ceph_current_fsid.rc == 0
|
|
||||||
- mon_group_name in group_names
|
- mon_group_name in group_names
|
||||||
|
|
||||||
# This does not just restart OSDs but everything else too. Unfortunately
|
# This does not just restart OSDs but everything else too. Unfortunately
|
||||||
# at this time the ansible role does not have an OSD id list to use
|
# at this time the ansible role does not have an OSD id list to use
|
||||||
# for restarting them specifically.
|
# for restarting them specifically.
|
||||||
- block:
|
|
||||||
- name: copy osd restart script
|
- name: copy osd restart script
|
||||||
template:
|
template:
|
||||||
src: restart_osd_daemon.sh.j2
|
src: restart_osd_daemon.sh.j2
|
||||||
|
@ -36,20 +33,35 @@
|
||||||
group: root
|
group: root
|
||||||
mode: 0750
|
mode: 0750
|
||||||
listen: "restart ceph osds"
|
listen: "restart ceph osds"
|
||||||
|
when:
|
||||||
|
- inventory_hostname in play_hosts
|
||||||
|
- osd_group_name in group_names
|
||||||
|
|
||||||
- name: restart ceph osds daemon(s)
|
- name: restart containerized ceph osds daemon(s)
|
||||||
command: /tmp/restart_osd_daemon.sh
|
command: /tmp/restart_osd_daemon.sh
|
||||||
listen: "restart ceph osds"
|
listen: "restart ceph osds"
|
||||||
when: handler_health_osd_check
|
with_items: "{{ socket_osd_container.results }}"
|
||||||
|
|
||||||
when:
|
when:
|
||||||
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
|
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
|
||||||
|
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
|
||||||
|
- ((crush_location is defined and crush_location) or item.get('rc') == 0)
|
||||||
|
- handler_health_osd_check
|
||||||
|
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
|
||||||
|
- inventory_hostname in play_hosts
|
||||||
|
- osd_group_name in group_names
|
||||||
|
|
||||||
|
- name: restart non-containerized ceph osds daemon(s)
|
||||||
|
command: /tmp/restart_osd_daemon.sh
|
||||||
|
listen: "restart ceph osds"
|
||||||
|
when:
|
||||||
|
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
|
||||||
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
|
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
|
||||||
- ((crush_location is defined and crush_location) or socket.rc == 0)
|
- ((crush_location is defined and crush_location) or socket.rc == 0)
|
||||||
- ceph_current_fsid.rc == 0
|
- ceph_current_fsid.rc == 0
|
||||||
- osd_group_name in group_names
|
- handler_health_osd_check
|
||||||
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
|
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
|
||||||
- inventory_hostname in play_hosts
|
- inventory_hostname in play_hosts
|
||||||
|
- osd_group_name in group_names
|
||||||
|
|
||||||
- name: restart ceph mdss
|
- name: restart ceph mdss
|
||||||
service:
|
service:
|
|
@ -0,0 +1,21 @@
|
||||||
|
---
|
||||||
|
# These checks are used to avoid running handlers at initial deployment.
|
||||||
|
- name: check for a ceph socket
|
||||||
|
shell: |
|
||||||
|
{{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/*.asok > /dev/null 2>&1'
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
always_run: true
|
||||||
|
register: socket
|
||||||
|
|
||||||
|
- name: check for a ceph socket in containerized deployment (osds)
|
||||||
|
shell: |
|
||||||
|
docker exec ceph-osd-"{{ ansible_hostname }}"-"{{ item | replace('/', '') }}" bash -c 'stat /var/run/ceph/*.asok > /dev/null 2>&1'
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
always_run: true
|
||||||
|
register: socket_osd_container
|
||||||
|
with_items: "{{ devices }}"
|
||||||
|
when:
|
||||||
|
- containerized_deployment
|
||||||
|
- inventory_hostname in groups.get(osd_group_name)
|
|
@ -1,2 +1,3 @@
|
||||||
---
|
---
|
||||||
- include: facts.yml
|
- include: facts.yml
|
||||||
|
- include: check_socket.yml
|
||||||
|
|
|
@ -3,13 +3,12 @@
|
||||||
RETRIES="{{ handler_health_mon_check_retries }}"
|
RETRIES="{{ handler_health_mon_check_retries }}"
|
||||||
DELAY="{{ handler_health_mon_check_delay }}"
|
DELAY="{{ handler_health_mon_check_delay }}"
|
||||||
MONITOR_NAME="{{ monitor_name }}"
|
MONITOR_NAME="{{ monitor_name }}"
|
||||||
CLUSTER="{{ cluster }}"
|
SOCKET=/var/run/ceph/{{ cluster }}-mon.${MONITOR_NAME}.asok
|
||||||
SOCKET=/var/run/ceph/${CLUSTER}-mon.${MONITOR_NAME}.asok
|
|
||||||
|
|
||||||
|
|
||||||
check_quorum() {
|
check_quorum() {
|
||||||
while [ $RETRIES -ne 0 ]; do
|
while [ $RETRIES -ne 0 ]; do
|
||||||
MEMBERS=$(ceph --cluster ${CLUSTER} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
|
MEMBERS=$({{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
|
||||||
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
|
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
|
||||||
sleep $DELAY
|
sleep $DELAY
|
||||||
let RETRIES=RETRIES-1
|
let RETRIES=RETRIES-1
|
||||||
|
@ -17,7 +16,7 @@ done
|
||||||
# If we reach this point, it means there is a problem with the quorum
|
# If we reach this point, it means there is a problem with the quorum
|
||||||
echo "Error with quorum."
|
echo "Error with quorum."
|
||||||
echo "cluster status:"
|
echo "cluster status:"
|
||||||
ceph --cluster ${CLUSTER} -s
|
{{ docker_exec_cmd }} ceph --cluster {{ cluster }} -s
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,7 +26,7 @@ systemctl restart ceph-mon@${MONITOR_NAME}
|
||||||
COUNT=10
|
COUNT=10
|
||||||
# Wait and ensure the socket exists after restarting the daemon
|
# Wait and ensure the socket exists after restarting the daemon
|
||||||
while [ $COUNT -ne 0 ]; do
|
while [ $COUNT -ne 0 ]; do
|
||||||
test -S $SOCKET && check_quorum
|
{{ docker_exec_cmd }} test -S $SOCKET && check_quorum
|
||||||
sleep 1
|
sleep 1
|
||||||
let COUNT=COUNT-1
|
let COUNT=COUNT-1
|
||||||
done
|
done
|
|
@ -0,0 +1,78 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
RETRIES="{{ handler_health_osd_check_retries }}"
|
||||||
|
DELAY="{{ handler_health_osd_check_delay }}"
|
||||||
|
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
|
||||||
|
|
||||||
|
check_pgs() {
|
||||||
|
while [ $RETRIES -ne 0 ]; do
|
||||||
|
test "[""$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
|
||||||
|
RET=$?
|
||||||
|
test $RET -eq 0 && return 0
|
||||||
|
sleep $DELAY
|
||||||
|
let RETRIES=RETRIES-1
|
||||||
|
done
|
||||||
|
# PGs not clean, exiting with return code 1
|
||||||
|
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
|
||||||
|
echo "It is possible that the cluster has less OSDs than the replica configuration"
|
||||||
|
echo "Will refuse to continue"
|
||||||
|
$docker_exec ceph "$CEPH_CLI" -s
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_socket_in_docker() {
|
||||||
|
if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then
|
||||||
|
log "Timed out while trying to look for a Ceph OSD socket."
|
||||||
|
log "Abort mission!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
get_dev_name() {
|
||||||
|
echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'
|
||||||
|
}
|
||||||
|
|
||||||
|
get_docker_id_from_dev_name() {
|
||||||
|
local id
|
||||||
|
local count
|
||||||
|
count=10
|
||||||
|
while [ $count -ne 0 ]; do
|
||||||
|
id=$(docker ps -q -f "name=$1")
|
||||||
|
test "$id" != "" && break
|
||||||
|
sleep 1
|
||||||
|
let count=count-1
|
||||||
|
done
|
||||||
|
echo "$id"
|
||||||
|
}
|
||||||
|
|
||||||
|
get_docker_osd_id() {
|
||||||
|
wait_for_socket_in_docker $1
|
||||||
|
docker exec "$1" ls /var/run/ceph | cut -d'.' -f2
|
||||||
|
}
|
||||||
|
|
||||||
|
# For containerized deployments, the unit file looks like: ceph-osd@sda.service
|
||||||
|
# For non-containerized deployments, the unit file looks like: ceph-osd@0.service
|
||||||
|
for unit in $(systemctl list-units | grep -oE "ceph-osd@([0-9]{1,2}|[a-z]+).service"); do
|
||||||
|
# First, restart daemon(s)
|
||||||
|
systemctl restart "${unit}"
|
||||||
|
# We need to wait because it may take some time for the socket to actually exists
|
||||||
|
COUNT=10
|
||||||
|
# Wait and ensure the socket exists after restarting the daemon
|
||||||
|
{% if containerized_deployment -%}
|
||||||
|
id=$(get_dev_name "$unit")
|
||||||
|
container_id=$(get_docker_id_from_dev_name "$id")
|
||||||
|
osd_id=$(get_docker_osd_id "$container_id")
|
||||||
|
docker_exec="docker exec $container_id"
|
||||||
|
{% else %}
|
||||||
|
osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]{1,2}')
|
||||||
|
{% endif %}
|
||||||
|
SOCKET=/var/run/ceph/test-osd.${osd_id}.asok
|
||||||
|
while [ $COUNT -ne 0 ]; do
|
||||||
|
$docker_exec test -S "$SOCKET" && check_pgs && continue 2
|
||||||
|
sleep 1
|
||||||
|
let COUNT=COUNT-1
|
||||||
|
done
|
||||||
|
# If we reach this point, it means the socket is not present.
|
||||||
|
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
|
||||||
|
exit 1
|
||||||
|
done
|
|
@ -44,10 +44,15 @@
|
||||||
config_type: ini
|
config_type: ini
|
||||||
when:
|
when:
|
||||||
- (not mon_containerized_default_ceph_conf_with_kv and
|
- (not mon_containerized_default_ceph_conf_with_kv and
|
||||||
(inventory_hostname in groups.get(mon_group_name, []))) or
|
(inventory_hostname in groups.get(mon_group_name, []) or inventory_hostname in groups.get(osd_group_name, []))) or
|
||||||
(not mon_containerized_default_ceph_conf_with_kv and
|
(not mon_containerized_default_ceph_conf_with_kv and
|
||||||
((groups.get(nfs_group_name, []) | length > 0)
|
((groups.get(nfs_group_name, []) | length > 0)
|
||||||
and (inventory_hostname == groups.get(nfs_group_name, [])[0])))
|
and (inventory_hostname == groups.get(nfs_group_name, [])[0])))
|
||||||
|
notify:
|
||||||
|
- restart ceph mons
|
||||||
|
- restart ceph osds
|
||||||
|
- restart ceph mdss
|
||||||
|
- restart ceph rgws
|
||||||
|
|
||||||
- name: set fsid fact when generate_fsid = true
|
- name: set fsid fact when generate_fsid = true
|
||||||
set_fact:
|
set_fact:
|
||||||
|
|
|
@ -105,7 +105,6 @@ openstack_keys:
|
||||||
##########
|
##########
|
||||||
# DOCKER #
|
# DOCKER #
|
||||||
##########
|
##########
|
||||||
docker_exec_cmd:
|
|
||||||
ceph_mon_docker_subnet: "{{ public_network }}"# subnet of the monitor_interface
|
ceph_mon_docker_subnet: "{{ public_network }}"# subnet of the monitor_interface
|
||||||
|
|
||||||
# ceph_mon_docker_extra_env:
|
# ceph_mon_docker_extra_env:
|
||||||
|
|
|
@ -2,14 +2,21 @@
|
||||||
- name: generate systemd unit file
|
- name: generate systemd unit file
|
||||||
become: true
|
become: true
|
||||||
template:
|
template:
|
||||||
src: "{{ role_path }}/templates/ceph-rgw.service.j2"
|
src: "{{ role_path }}/templates/ceph-radosgw.service.j2"
|
||||||
dest: /etc/systemd/system/ceph-rgw@.service
|
dest: /etc/systemd/system/ceph-radosgw@.service
|
||||||
owner: "root"
|
owner: "root"
|
||||||
group: "root"
|
group: "root"
|
||||||
mode: "0644"
|
mode: "0644"
|
||||||
|
|
||||||
|
# For backward compatibility
|
||||||
|
- name: disable old systemd unit ('ceph-rgw@') if present
|
||||||
|
service:
|
||||||
|
name: ceph-rgw@{{ ansible_hostname }}
|
||||||
|
state: disable
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
- name: enable systemd unit file for rgw instance
|
- name: enable systemd unit file for rgw instance
|
||||||
shell: systemctl enable ceph-rgw@{{ ansible_hostname }}.service
|
shell: systemctl enable ceph-radosgw@{{ ansible_hostname }}.service
|
||||||
failed_when: false
|
failed_when: false
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
|
@ -20,7 +27,7 @@
|
||||||
|
|
||||||
- name: systemd start rgw container
|
- name: systemd start rgw container
|
||||||
service:
|
service:
|
||||||
name: ceph-rgw@{{ ansible_hostname }}
|
name: ceph-radosgw@{{ ansible_hostname }}
|
||||||
state: started
|
state: started
|
||||||
enabled: yes
|
enabled: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
Loading…
Reference in New Issue