shrink mon and osd

Rework shrinking a monitor and an OSD playbook. Also adding test
scenario.

Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1366807
Signed-off-by: Sébastien Han <seb@redhat.com>
pull/1836/head
Sébastien Han 2017-08-31 00:07:28 +02:00
parent 6ae8219018
commit 298a63c437
8 changed files with 261 additions and 254 deletions

18
Vagrantfile vendored
View File

@ -187,7 +187,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..MGRS - 1).each do |i| (0..MGRS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}mgr#{i}" do |mgr| config.vm.define "#{LABEL_PREFIX}mgr#{i}" do |mgr|
mgr.vm.hostname = "#{LABEL_PREFIX}ceph-mgr#{i}" mgr.vm.hostname = "#{LABEL_PREFIX}mgr#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
mgr.vm.network :private_network, mgr.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.3#{i}" ip: "#{PUBLIC_SUBNET}.3#{i}"
@ -223,7 +223,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..CLIENTS - 1).each do |i| (0..CLIENTS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}client#{i}" do |client| config.vm.define "#{LABEL_PREFIX}client#{i}" do |client|
client.vm.box = CLIENT_BOX client.vm.box = CLIENT_BOX
client.vm.hostname = "#{LABEL_PREFIX}ceph-client#{i}" client.vm.hostname = "#{LABEL_PREFIX}client#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
client.vm.network :private_network, client.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.4#{i}" ip: "#{PUBLIC_SUBNET}.4#{i}"
@ -258,7 +258,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..NRGWS - 1).each do |i| (0..NRGWS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}rgw#{i}" do |rgw| config.vm.define "#{LABEL_PREFIX}rgw#{i}" do |rgw|
rgw.vm.hostname = "#{LABEL_PREFIX}ceph-rgw#{i}" rgw.vm.hostname = "#{LABEL_PREFIX}rgw#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
rgw.vm.network :private_network, rgw.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.5#{i}" ip: "#{PUBLIC_SUBNET}.5#{i}"
@ -294,7 +294,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..NNFSS - 1).each do |i| (0..NNFSS - 1).each do |i|
config.vm.define "nfs#{i}" do |nfs| config.vm.define "nfs#{i}" do |nfs|
nfs.vm.hostname = "ceph-nfs#{i}" nfs.vm.hostname = "#{LABEL_PREFIX}ceph-nfs#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
nfs.vm.network :private_network, nfs.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.6#{i}" ip: "#{PUBLIC_SUBNET}.6#{i}"
@ -330,7 +330,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..NMDSS - 1).each do |i| (0..NMDSS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}mds#{i}" do |mds| config.vm.define "#{LABEL_PREFIX}mds#{i}" do |mds|
mds.vm.hostname = "#{LABEL_PREFIX}ceph-mds#{i}" mds.vm.hostname = "#{LABEL_PREFIX}mds#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
mds.vm.network :private_network, mds.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.7#{i}" ip: "#{PUBLIC_SUBNET}.7#{i}"
@ -364,7 +364,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..NRBD_MIRRORS - 1).each do |i| (0..NRBD_MIRRORS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}rbd_mirror#{i}" do |rbd_mirror| config.vm.define "#{LABEL_PREFIX}rbd_mirror#{i}" do |rbd_mirror|
rbd_mirror.vm.hostname = "#{LABEL_PREFIX}ceph-rbd-mirror#{i}" rbd_mirror.vm.hostname = "#{LABEL_PREFIX}rbd-mirror#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
rbd_mirror.vm.network :private_network, rbd_mirror.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.8#{i}" ip: "#{PUBLIC_SUBNET}.8#{i}"
@ -398,7 +398,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..NISCSI_GWS - 1).each do |i| (0..NISCSI_GWS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}iscsi_gw#{i}" do |iscsi_gw| config.vm.define "#{LABEL_PREFIX}iscsi_gw#{i}" do |iscsi_gw|
iscsi_gw.vm.hostname = "#{LABEL_PREFIX}ceph-iscsi-gw#{i}" iscsi_gw.vm.hostname = "#{LABEL_PREFIX}iscsi-gw#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
iscsi_gw.vm.network :private_network, iscsi_gw.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.9#{i}" ip: "#{PUBLIC_SUBNET}.9#{i}"
@ -432,7 +432,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..NMONS - 1).each do |i| (0..NMONS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}mon#{i}" do |mon| config.vm.define "#{LABEL_PREFIX}mon#{i}" do |mon|
mon.vm.hostname = "#{LABEL_PREFIX}ceph-mon#{i}" mon.vm.hostname = "#{LABEL_PREFIX}mon#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
mon.vm.network :private_network, mon.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.1#{i}" ip: "#{PUBLIC_SUBNET}.1#{i}"
@ -467,7 +467,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
(0..NOSDS - 1).each do |i| (0..NOSDS - 1).each do |i|
config.vm.define "#{LABEL_PREFIX}osd#{i}" do |osd| config.vm.define "#{LABEL_PREFIX}osd#{i}" do |osd|
osd.vm.hostname = "#{LABEL_PREFIX}ceph-osd#{i}" osd.vm.hostname = "#{LABEL_PREFIX}osd#{i}"
if ASSIGN_STATIC_IP if ASSIGN_STATIC_IP
osd.vm.network :private_network, osd.vm.network :private_network,
ip: "#{PUBLIC_SUBNET}.10#{i}" ip: "#{PUBLIC_SUBNET}.10#{i}"

View File

@ -1,9 +1,9 @@
--- ---
# This playbook shrinks the Ceph monitors from your cluster # This playbook shrinks the Ceph monitors from your cluster
# It can remove any number of monitor(s) from the cluster and ALL THEIR DATA # It can remove a Ceph of monitor from the cluster and ALL ITS DATA
# #
# Use it like this: # Use it like this:
# ansible-playbook shrink-mon.yml -e mon_host=ceph-mon01,ceph-mon02 # ansible-playbook shrink-mon.yml -e mon_to_kill=ceph-mon01
# Prompts for confirmation to shrink, defaults to no and # Prompts for confirmation to shrink, defaults to no and
# doesn't shrink the cluster. yes shrinks the cluster. # doesn't shrink the cluster. yes shrinks the cluster.
# #
@ -12,12 +12,21 @@
# automation scripts to avoid interactive prompt. # automation scripts to avoid interactive prompt.
- name: confirm whether user really meant to remove monitor(s) from the ceph cluster - name: gather facts and check the init system
hosts:
- "{{ mon_group_name|default('mons') }}"
become: true
tasks:
- debug: msg="gather facts on all Ceph hosts for following reference"
- name: confirm whether user really meant to remove monitor from the ceph cluster
hosts: hosts:
- localhost - localhost
gather_facts: false
become: true become: true
vars_prompt: vars_prompt:
@ -26,123 +35,110 @@
default: 'no' default: 'no'
private: no private: no
tasks: vars:
- include_vars: roles/ceph-defaults/defaults/main.yml mon_group_name: mons
- include_vars: group_vars/all.yml
pre_tasks:
- name: exit playbook, if only one monitor is present in cluster - name: exit playbook, if only one monitor is present in cluster
fail: fail:
msg: "You are about to shrink the only monitor present in the cluster. msg: "You are about to shrink the only monitor present in the cluster.
If you really want to do that, please use the purge-cluster playbook." If you really want to do that, please use the purge-cluster playbook."
when: "{{ groups[mon_group_name] | length | int == 1 }}" when:
- groups[mon_group_name] | length | int == 1
- name: exit playbook, if no monitor was given
fail:
msg: "mon_to_kill must be declared
Exiting shrink-cluster playbook, no monitor was removed.
On the command line when invoking the playbook, you can use
-e mon_to_kill=ceph-mon01 argument. You can only remove a single monitor each time the playbook runs."
when:
- mon_to_kill is not defined
- name: exit playbook, if the monitor is not part of the inventory
fail:
msg: "It seems that the host given is not part of your inventory, please make sure it is."
when:
- mon_to_kill not in groups[mon_group_name]
- name: exit playbook, if user did not mean to shrink cluster - name: exit playbook, if user did not mean to shrink cluster
fail: fail:
msg: "Exiting shrink-mon playbook, no monitor(s) was/were removed. msg: "Exiting shrink-mon playbook, no monitor was removed.
To shrink the cluster, either say 'yes' on the prompt or To shrink the cluster, either say 'yes' on the prompt or
or use `-e ireallymeanit=yes` on the command line when or use `-e ireallymeanit=yes` on the command line when
invoking the playbook" invoking the playbook"
when: ireallymeanit != 'yes'
- name: exit playbook, if no monitor(s) was/were given
fail:
msg: "mon_host must be declared
Exiting shrink-cluster playbook, no monitor(s) was/were removed.
On the command line when invoking the playbook, you can use
-e mon_host=ceph-mon01,ceph-mon02 argument."
when: mon_host is not defined
- name: test if ceph command exist
command: command -v ceph
changed_when: false
failed_when: false
register: ceph_command
- name: exit playbook, if ceph command does not exist
debug:
msg: "The ceph command is not available, please install it :("
run_once: true
when: when:
- ceph_command.rc != 0 - ireallymeanit != 'yes'
- name: exit playbook, if cluster files do not exist roles:
stat: - ceph-defaults
path: "{{ item }}"
register: ceph_conf_key
with_items:
- /etc/ceph/{{ cluster }}.conf
- /etc/ceph/{{ cluster }}.client.admin.keyring
failed_when: false
- fail: post_tasks:
msg: "Ceph's configuration file is not present in /etc/ceph" - name: pick a monitor different than the one we want to remove
with_items: "{{ceph_conf_key.results}}" set_fact: mon_host={{ item }}
with_items: "{{ groups[mon_group_name] }}"
when: when:
- item.stat.exists == false - item != mon_to_kill
- name: exit playbook, if can not connect to the cluster - name: exit playbook, if can not connect to the cluster
command: timeout 5 ceph --cluster {{ cluster }} health command: timeout 5 ceph --cluster {{ cluster }} health
register: ceph_health register: ceph_health
until: ceph_health.stdout.find("HEALTH") > -1 until: ceph_health.stdout.find("HEALTH") > -1
delegate_to: "{{ mon_host }}"
retries: 5 retries: 5
delay: 2 delay: 2
- name: verify given monitors are reachable - set_fact:
command: ping -c 1 {{ item }} mon_to_kill_hostname: "{{ hostvars[mon_to_kill]['ansible_hostname'] }}"
with_items: "{{mon_host.split(',')}}"
register: mon_reachable
failed_when: false
- fail: - name: stop monitor service(s)
msg: "One or more monitors are not reachable, please check your /etc/hosts or your DNS"
with_items: "{{mon_reachable.results}}"
when:
- item.rc != 0
- name: stop monitor service
service: service:
name: ceph-mon@{{ item }} name: ceph-mon@{{ mon_to_kill_hostname }}
state: stopped state: stopped
enabled: no enabled: no
with_items: "{{mon_host.split(',')}}" delegate_to: "{{ mon_to_kill }}"
delegate_to: "{{item}}"
failed_when: false failed_when: false
- name: purge monitor store - name: purge monitor store
file: file:
path: /var/lib/ceph/mon/{{ cluster }}-{{ item }} path: /var/lib/ceph/mon/{{ cluster }}-{{ mon_to_kill_hostname }}
state: absent state: absent
with_items: "{{mon_host.split(',')}}" delegate_to: "{{ mon_to_kill }}"
delegate_to: "{{item}}"
- name: remove monitor from the quorum - name: remove monitor from the quorum
command: ceph --cluster {{ cluster }} mon remove {{ item }} command: ceph --cluster {{ cluster }} mon remove {{ mon_to_kill_hostname }}
failed_when: false failed_when: false
with_items: "{{mon_host.split(',')}}" delegate_to: "{{ mon_host }}"
# NOTE (leseb): sorry for the 'sleep' command # NOTE (leseb): sorry for the 'sleep' command
# but it will take a couple of seconds for other monitors # but it will take a couple of seconds for other monitors
# to notice that one member has left. # to notice that one member has left.
# 'sleep 5' is not that bad and should be sufficient # 'sleep 5' is not that bad and should be sufficient
- name: verify the monitor is out of the cluster - name: verify the monitor is out of the cluster
shell: "sleep 5 && ceph --cluster {{ cluster }} -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ item }}" shell: |
with_items: "{{mon_host.split(',')}}" ceph --cluster {{ cluster }} -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["quorum_names"])'
failed_when: false delegate_to: "{{ mon_host }}"
register: ceph_health_mon register: result
- name: please remove the monitor from your ceph configuration file - name: please remove the monitor from your ceph configuration file
debug: debug:
msg: "The monitor(s) has/have been successfully removed from the cluster. msg: "The monitor has been successfully removed from the cluster.
Please remove the monitor(s) entry(ies) from the rest of your ceph configuration files, cluster wide." Please remove the monitor entry from the rest of your ceph configuration files, cluster wide."
run_once: true run_once: true
with_items: "{{ceph_health_mon.results}}"
when: when:
- item.rc != 0 - mon_to_kill_hostname not in result.stdout
- name: please remove the monitor from your ceph configuration file - name: fail if monitor is still part of the cluster
fail: fail:
msg: "Monitor(s) appear(s) to still be part of the cluster, please check what happened." msg: "Monitor appears to still be part of the cluster, please check what happened."
run_once: true run_once: true
with_items: "{{ceph_health_mon.results}}"
when: when:
- item.rc == 0 - mon_to_kill_hostname in result.stdout
- name: show ceph health
command: ceph --cluster {{ cluster }} -s
delegate_to: "{{ mon_host }}"
- name: show ceph mon status
command: ceph --cluster {{ cluster }} mon stat
delegate_to: "{{ mon_host }}"

View File

@ -3,7 +3,7 @@
# It can remove any number of OSD(s) from the cluster and ALL THEIR DATA # It can remove any number of OSD(s) from the cluster and ALL THEIR DATA
# #
# Use it like this: # Use it like this:
# ansible-playbook shrink-osd.yml -e osd_id=0,2,6 # ansible-playbook shrink-osd.yml -e osd_to_kill=0,2,6
# Prompts for confirmation to shrink, defaults to no and # Prompts for confirmation to shrink, defaults to no and
# doesn't shrink the cluster. yes shrinks the cluster. # doesn't shrink the cluster. yes shrinks the cluster.
# #
@ -11,13 +11,21 @@
# Overrides the prompt using -e option. Can be used in # Overrides the prompt using -e option. Can be used in
# automation scripts to avoid interactive prompt. # automation scripts to avoid interactive prompt.
- name: gather facts and check the init system
hosts:
- "{{ mon_group_name|default('mons') }}"
- "{{ osd_group_name|default('osds') }}"
become: True
tasks:
- debug: msg="gather facts on all Ceph hosts for following reference"
- name: confirm whether user really meant to remove osd(s) from the cluster - name: confirm whether user really meant to remove osd(s) from the cluster
hosts: hosts:
- localhost - localhost
gather_facts: false
become: true become: true
vars_prompt: vars_prompt:
@ -26,10 +34,11 @@
default: 'no' default: 'no'
private: no private: no
tasks: vars:
- include_vars: roles/ceph-defaults/defaults/main.yml mon_group_name: mons
- include_vars: group_vars/all.yml osd_group_name: osds
pre_tasks:
- name: exit playbook, if user did not mean to shrink cluster - name: exit playbook, if user did not mean to shrink cluster
fail: fail:
msg: "Exiting shrink-osd playbook, no osd(s) was/were removed.. msg: "Exiting shrink-osd playbook, no osd(s) was/were removed..
@ -40,73 +49,40 @@
- name: exit playbook, if no osd(s) was/were given - name: exit playbook, if no osd(s) was/were given
fail: fail:
msg: "osd_ids must be declared msg: "osd_to_kill must be declared
Exiting shrink-osd playbook, no OSD()s was/were removed. Exiting shrink-osd playbook, no OSD()s was/were removed.
On the command line when invoking the playbook, you can use On the command line when invoking the playbook, you can use
-e osd_ids=0,1,2,3 argument." -e osd_to_kill=0,1,2,3 argument."
when: osd_ids is not defined when: osd_to_kill is not defined
- name: test if ceph command exist roles:
command: command -v ceph - ceph-defaults
changed_when: false
failed_when: false
register: ceph_command
- name: exit playbook, if ceph command does not exist post_tasks:
debug:
msg: "The ceph command is not available, please install it :("
run_once: true
when:
- ceph_command.rc != 0
- name: exit playbook, if cluster files do not exist
stat:
path: "{{ item }}"
register: ceph_conf_key
with_items:
- /etc/ceph/{{ cluster }}.conf
- /etc/ceph/{{ cluster }}.client.admin.keyring
failed_when: false
- fail:
msg: "Ceph's configuration file is not present in /etc/ceph"
with_items: "{{ ceph_conf_key.results }}"
when:
- item.stat.exists == false
- name: exit playbook, if can not connect to the cluster - name: exit playbook, if can not connect to the cluster
command: timeout 5 ceph --cluster {{ cluster }} health command: timeout 5 ceph --cluster {{ cluster }} health
register: ceph_health register: ceph_health
until: ceph_health.stdout.find("HEALTH") > -1 until: ceph_health.stdout.find("HEALTH") > -1
delegate_to: "{{ groups[mon_group_name][0] }}"
retries: 5 retries: 5
delay: 2 delay: 2
# NOTE (leseb): just in case, the complex filters mechanism below does not work anymore. - name: find the host(s) where the osd(s) is/are running on
# This will be a quick and easy fix but will require using the shell module.
# - name: find the host where the osd(s) is/are running on
# shell: |
# ceph --cluster {{ cluster }} osd find {{ item }} | grep -Po '(?<="ip": ")[^:]*'
# with_items: "{{osd_ids.split(',')}}"
# register: osd_hosts
#
- name: find the host where the osd(s) is/are running on
command: ceph --cluster {{ cluster }} osd find {{ item }} command: ceph --cluster {{ cluster }} osd find {{ item }}
with_items: "{{ osd_ids.split(',') }}" with_items: "{{ osd_to_kill.split(',') }}"
register: osd_hosts delegate_to: "{{ groups[mon_group_name][0] }}"
register: find_osd_hosts
- set_fact: ip_item="{{(item.stdout | from_json).ip}}" - set_fact:
with_items: "{{ osd_hosts.results }}" osd_hosts: "{{ (item.stdout | from_json).crush_location.host }}"
register: ip_result with_items: "{{ find_osd_hosts.results }}"
- set_fact: ips="{{ ip_result.results | map(attribute='ansible_facts.ip_item') | list }}"
- set_fact: real_ips="{{ ips | regex_replace(':[0-9][0-9][0-9][0-9]\/[0-9]+', '') }}"
- name: check if ceph admin key exists on the osd nodes - name: check if ceph admin key exists on the osd nodes
stat: stat:
path: "/etc/ceph/{{ cluster }}.client.admin.keyring" path: "/etc/ceph/{{ cluster }}.client.admin.keyring"
register: ceph_admin_key register: ceph_admin_key
with_items: "{{ real_ips }}" with_items: "{{ osd_hosts }}"
delegate_to: "{{ item }}" delegate_to: "{{ item }}"
failed_when: false failed_when: false
@ -121,14 +97,15 @@
register: deactivate register: deactivate
ignore_errors: yes ignore_errors: yes
with_together: with_together:
- "{{ osd_ids.split(',') }}" - "{{ osd_to_kill.split(',') }}"
- "{{ real_ips }}" - "{{ osd_hosts }}"
delegate_to: "{{ item.1 }}" delegate_to: "{{ item.1 }}"
- name: set osd(s) out when ceph-disk deactivating fail - name: set osd(s) out when ceph-disk deactivating fail
command: ceph --cluster {{ cluster }} osd out osd.{{ item.0 }} command: ceph --cluster {{ cluster }} osd out osd.{{ item.0 }}
delegate_to: "{{ groups[mon_group_name][0] }}"
with_together: with_together:
- "{{ osd_ids.split(',') }}" - "{{ osd_to_kill.split(',') }}"
- "{{ deactivate.results }}" - "{{ deactivate.results }}"
when: when:
- item.1.stderr|length > 0 - item.1.stderr|length > 0
@ -138,30 +115,41 @@
register: destroy register: destroy
ignore_errors: yes ignore_errors: yes
with_together: with_together:
- "{{ osd_ids.split(',') }}" - "{{ osd_to_kill.split(',') }}"
- "{{ real_ips }}" - "{{ osd_hosts }}"
delegate_to: "{{ item.1 }}" delegate_to: "{{ item.1 }}"
- name: remove osd(s) from crush_map when ceph-disk destroy fail - name: remove osd(s) from crush_map when ceph-disk destroy fail
command: ceph --cluster {{ cluster }} osd crush remove osd.{{ item.0 }} command: ceph --cluster {{ cluster }} osd crush remove osd.{{ item.0 }}
delegate_to: "{{ groups[mon_group_name][0] }}"
with_together: with_together:
- "{{ osd_ids.split(',') }}" - "{{ osd_to_kill.split(',') }}"
- "{{ destroy.results }}" - "{{ destroy.results }}"
when: when:
- item.1.stderr|length > 0 - item.1.stderr|length > 0
- name: delete osd(s) auth key when ceph-disk destroy fail - name: delete osd(s) auth key when ceph-disk destroy fail
command: ceph --cluster {{ cluster }} auth del osd.{{ item.0 }} command: ceph --cluster {{ cluster }} auth del osd.{{ item.0 }}
delegate_to: "{{ groups[mon_group_name][0] }}"
with_together: with_together:
- "{{ osd_ids.split(',') }}" - "{{ osd_to_kill.split(',') }}"
- "{{ destroy.results }}" - "{{ destroy.results }}"
when: when:
- item.1.stderr|length > 0 - item.1.stderr|length > 0
- name: deallocate osd(s) id when ceph-disk destroy fail - name: deallocate osd(s) id when ceph-disk destroy fail
command: ceph --cluster {{ cluster }} osd rm {{ item.0 }} command: ceph --cluster {{ cluster }} osd rm {{ item.0 }}
delegate_to: "{{ groups[mon_group_name][0] }}"
with_together: with_together:
- "{{ osd_ids.split(',') }}" - "{{ osd_to_kill.split(',') }}"
- "{{ destroy.results }}" - "{{ destroy.results }}"
when: when:
- item.1.stderr|length > 0 - item.1.stderr|length > 0
- name: show ceph health
command: ceph --cluster {{ cluster }} -s
delegate_to: "{{ groups[mon_group_name][0] }}"
- name: show ceph osd tree
command: ceph --cluster {{ cluster }} osd tree
delegate_to: "{{ groups[mon_group_name][0] }}"

View File

@ -1,4 +1,5 @@
--- ---
copy_admin_key: true
os_tuning_params: os_tuning_params:
- { name: kernel.pid_max, value: 4194303 } - { name: kernel.pid_max, value: 4194303 }
- { name: fs.file-max, value: 26234859 } - { name: fs.file-max, value: 26234859 }

View File

@ -1,19 +1,19 @@
[mons] [mons]
mon0 monitor_address=192.168.1.10 ceph-mon0 monitor_address=192.168.1.10
mon1 monitor_interface=eth1 ceph-mon1 monitor_interface=eth1
mon2 monitor_address=192.168.1.12 ceph-mon2 monitor_address=192.168.1.12
[osds] [osds]
osd0 ceph-osd0
[mdss] [mdss]
mds0 ceph-mds0
[rgws] [rgws]
rgw0 ceph-rgw0
[clients] [clients]
client0 ceph-client0
[mgrs] [mgrs]
mgr0 ceph-mgr0

View File

@ -72,3 +72,6 @@ vagrant_disable_synced_folder: true
os_tuning_params: os_tuning_params:
- { name: kernel.pid_max, value: 4194303 } - { name: kernel.pid_max, value: 4194303 }
- { name: fs.file-max, value: 26234859 } - { name: fs.file-max, value: 26234859 }
# VM prefix name, need to match the hostname
label_prefix: ceph

View File

@ -11,13 +11,13 @@ class TestMons(object):
assert host.socket("tcp://%s:6789" % node["address"]).is_listening assert host.socket("tcp://%s:6789" % node["address"]).is_listening
def test_mon_service_is_running(self, node, host): def test_mon_service_is_running(self, node, host):
service_name = "ceph-mon@ceph-{hostname}".format( service_name = "ceph-mon@{hostname}".format(
hostname=node["vars"]["inventory_hostname"] hostname=node["vars"]["inventory_hostname"]
) )
assert host.service(service_name).is_running assert host.service(service_name).is_running
def test_mon_service_is_enabled(self, node, host): def test_mon_service_is_enabled(self, node, host):
service_name = "ceph-mon@ceph-{hostname}".format( service_name = "ceph-mon@{hostname}".format(
hostname=node["vars"]["inventory_hostname"] hostname=node["vars"]["inventory_hostname"]
) )
assert host.service(service_name).is_enabled assert host.service(service_name).is_enabled
@ -32,7 +32,7 @@ class TestMons(object):
assert File(node["conf_path"]).contains("^mon initial members = .*$") assert File(node["conf_path"]).contains("^mon initial members = .*$")
def test_initial_members_line_has_correct_value(self, node, File): def test_initial_members_line_has_correct_value(self, node, File):
mons = ",".join("ceph-%s" % host mons = ",".join("%s" % host
for host in node["vars"]["groups"]["mons"]) for host in node["vars"]["groups"]["mons"])
line = "mon initial members = {}".format(mons) line = "mon initial members = {}".format(mons)
assert File(node["conf_path"]).contains(line) assert File(node["conf_path"]).contains(line)
@ -49,7 +49,7 @@ class TestOSDs(object):
@pytest.mark.docker @pytest.mark.docker
def test_all_docker_osds_are_up_and_in(self, node, host): def test_all_docker_osds_are_up_and_in(self, node, host):
cmd = "sudo docker exec ceph-mon-ceph-{} ceph --cluster={} --connect-timeout 5 -s".format( cmd = "sudo docker exec ceph-mon-{} ceph --cluster={} --connect-timeout 5 -s".format(
node["vars"]["inventory_hostname"], node["vars"]["inventory_hostname"],
node["cluster_name"] node["cluster_name"]
) )

21
tox.ini
View File

@ -1,6 +1,6 @@
[tox] [tox]
envlist = {dev,jewel,luminous,rhcs}-{ansible2.2,ansible2.3}-{xenial_cluster,journal_collocation,centos7_cluster,dmcrypt_journal,dmcrypt_journal_collocation,docker_cluster,purge_cluster,purge_dmcrypt,docker_dedicated_journal,docker_dmcrypt_journal_collocation,update_dmcrypt,update_cluster,cluster,purge_docker_cluster,update_docker_cluster} envlist = {dev,jewel,luminous,rhcs}-{ansible2.2,ansible2.3}-{xenial_cluster,journal_collocation,centos7_cluster,dmcrypt_journal,dmcrypt_journal_collocation,docker_cluster,purge_cluster,purge_dmcrypt,docker_dedicated_journal,docker_dmcrypt_journal_collocation,update_dmcrypt,update_cluster,cluster,purge_docker_cluster,update_docker_cluster}
{dev,luminous}-{ansible2.2,ansible2.3}-{bluestore_journal_collocation,bluestore_cluster,bluestore_dmcrypt_journal,bluestore_dmcrypt_journal_collocation,bluestore_docker_cluster,bluestore_docker_dedicated_journal,bluestore_docker_dmcrypt_journal_collocation,lvm_osds,purge_lvm_osds} {dev,luminous}-{ansible2.2,ansible2.3}-{bluestore_journal_collocation,bluestore_cluster,bluestore_dmcrypt_journal,bluestore_dmcrypt_journal_collocation,bluestore_docker_cluster,bluestore_docker_dedicated_journal,bluestore_docker_dmcrypt_journal_collocation,lvm_osds,purge_lvm_osds,shrink_mon,shrink_osd}
skipsdist = True skipsdist = True
@ -81,6 +81,21 @@ commands=
testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/tests/functional/tests testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/tests/functional/tests
[shrink-mon]
commands=
cp {toxinidir}/infrastructure-playbooks/shrink-mon.yml {toxinidir}/shrink-mon.yml
ansible-playbook -vv -i {changedir}/hosts {toxinidir}/shrink-mon.yml --extra-vars "\
ireallymeanit=yes \
mon_to_kill=ceph-mon2 \
"
[shrink-osd]
commands=
cp {toxinidir}/infrastructure-playbooks/shrink-osd.yml {toxinidir}/shrink-osd.yml
ansible-playbook -vv -i {changedir}/hosts {toxinidir}/shrink-osd.yml --extra-vars "\
ireallymeanit=yes \
osd_to_kill=0 \
"
[testenv] [testenv]
whitelist_externals = whitelist_externals =
vagrant vagrant
@ -128,6 +143,8 @@ changedir=
dmcrypt_journal_collocation: {toxinidir}/tests/functional/centos/7/crypt-jrn-col dmcrypt_journal_collocation: {toxinidir}/tests/functional/centos/7/crypt-jrn-col
# tests a 1 mon, 1 osd, 1 mds and 1 rgw centos7 cluster using non-collocated OSD scenario # tests a 1 mon, 1 osd, 1 mds and 1 rgw centos7 cluster using non-collocated OSD scenario
centos7_cluster: {toxinidir}/tests/functional/centos/7/cluster centos7_cluster: {toxinidir}/tests/functional/centos/7/cluster
shrink_mon: {toxinidir}/tests/functional/centos/7/cluster
shrink_osd: {toxinidir}/tests/functional/centos/7/cluster
# an alias for centos7_cluster, this makes the name better suited for rhcs testing # an alias for centos7_cluster, this makes the name better suited for rhcs testing
cluster: {toxinidir}/tests/functional/centos/7/cluster cluster: {toxinidir}/tests/functional/centos/7/cluster
# tests a 1 mon, 1 osd, 1 mds and 1 rgw centos7 cluster using docker # tests a 1 mon, 1 osd, 1 mds and 1 rgw centos7 cluster using docker
@ -183,5 +200,7 @@ commands=
update_dmcrypt: {[update]commands} update_dmcrypt: {[update]commands}
update_cluster: {[update]commands} update_cluster: {[update]commands}
update_docker_cluster: {[update]commands} update_docker_cluster: {[update]commands}
shrink_mon: {[shrink-mon]commands}
shrink_osd: {[shrink-osd]commands}
vagrant destroy --force vagrant destroy --force