ceph-ansible/roles/ceph-facts/tasks/facts.yml

294 lines
11 KiB
YAML
Raw Normal View History

---
- name: check if it is atomic host
stat:
path: /run/ostree-booted
register: stat_ostree
- name: set_fact is_atomic
set_fact:
is_atomic: "{{ stat_ostree.stat.exists }}"
- name: import_tasks container_binary.yml
import_tasks: container_binary.yml
# In case ansible_python_interpreter is set by the user,
# ansible will not discover python and discovered_interpreter_python
# will not be set
- name: set_fact discovered_interpreter_python
set_fact:
discovered_interpreter_python: "{{ ansible_python_interpreter }}"
when: ansible_python_interpreter is defined
# Set ceph_release to ceph_stable by default
- name: set_fact ceph_release ceph_stable_release
set_fact:
ceph_release: "{{ ceph_stable_release }}"
- name: set_fact monitor_name ansible_hostname
set_fact:
monitor_name: "{{ hostvars[item]['ansible_hostname'] }}"
delegate_to: "{{ item }}"
delegate_facts: true
with_items: "{{ groups.get(mon_group_name, []) }}"
run_once: true
when: groups.get(mon_group_name, []) | length > 0
- name: find a running monitor
when: groups.get(mon_group_name, []) | length > 0
block:
- name: set_fact container_exec_cmd
set_fact:
container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] if not rolling_update else hostvars[mon_host | default(groups[mon_group_name][0])]['ansible_hostname'] }}"
when:
- containerized_deployment | bool
- name: find a running mon container
command: "{{ container_binary }} ps -q --filter name=ceph-mon-{{ hostvars[item]['ansible_hostname'] }}"
register: find_running_mon_container
failed_when: false
run_once: true
delegate_to: "{{ item }}"
with_items: "{{ groups.get(mon_group_name, []) }}"
when:
- containerized_deployment | bool
- name: check for a ceph mon socket
shell: stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok
changed_when: false
failed_when: false
check_mode: no
register: mon_socket_stat
run_once: true
delegate_to: "{{ item }}"
with_items: "{{ groups.get(mon_group_name, []) }}"
when:
- not containerized_deployment | bool
- name: check if the ceph mon socket is in-use
command: grep -q {{ item.stdout }} /proc/net/unix
changed_when: false
failed_when: false
check_mode: no
register: mon_socket
run_once: true
with_items: "{{ mon_socket_stat.results }}"
when:
- not containerized_deployment | bool
- item.rc == 0
- name: set_fact running_mon - non_container
set_fact:
running_mon: "{{ hostvars[item.item.item]['inventory_hostname'] }}"
with_items: "{{ mon_socket.results }}"
run_once: true
when:
- not containerized_deployment | bool
- item.rc is defined
- item.rc == 0
- name: set_fact running_mon - container
set_fact:
running_mon: "{{ item.item }}"
run_once: true
with_items: "{{ find_running_mon_container.results }}"
when:
- containerized_deployment | bool
- item.stdout_lines | default([]) | length > 0
- name: set_fact _container_exec_cmd
set_fact:
_container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0] if running_mon is undefined else running_mon]['ansible_hostname'] }}"
when:
- containerized_deployment | bool
# this task shouldn't run in a rolling_update situation
# because it blindly picks a mon, which may be down because
# of the rolling update
- name: is ceph running already?
command: "{{ timeout_command }} {{ _container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json"
changed_when: false
failed_when: false
check_mode: no
register: ceph_current_status
run_once: true
delegate_to: "{{ groups[mon_group_name][0] if running_mon is undefined else running_mon }}"
when:
- not rolling_update | bool
# set this as a default when performing a rolling_update
# so the rest of the tasks here will succeed
- name: set_fact ceph_current_status rc 1
set_fact:
ceph_current_status:
rc: 1
when: rolling_update or groups.get(mon_group_name, []) | length == 0
- name: create a local fetch directory if it does not exist
file:
syntax: change local_action syntax Use a nicer syntax for `local_action` tasks. We used to have oneliner like this: ``` local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }} ``` The usual syntax: ``` local_action: module: wait_for port: 22 host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}" state: started delay: 10 timeout: 500 ``` is nicer and kind of way to keep consistency regarding the whole playbook. This also fix a potential issue about missing quotation : ``` Traceback (most recent call last): File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module> main() File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin) File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command File "/usr/lib64/python2.7/shlex.py", line 279, in split return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next token = self.get_token() File "/usr/lib64/python2.7/shlex.py", line 96, in get_token raw = self.read_token() File "/usr/lib64/python2.7/shlex.py", line 172, in read_token raise ValueError, "No closing quotation" ValueError: No closing quotation ``` writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf` can cause trouble because it's complaining with missing quotes, this fix solves this issue. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555 Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
path: "{{ fetch_directory }}"
state: directory
delegate_to: localhost
changed_when: false
become: false
when: cephx | bool or generate_fsid | bool
- name: get current fsid
command: "{{ timeout_command }} {{ container_exec_cmd }} ceph --admin-daemon /var/run/ceph/{{ cluster }}-mon.{{ hostvars[mon_host | default(groups[mon_group_name][0])]['ansible_hostname'] }}.asok config get fsid"
register: rolling_update_fsid
delegate_to: "{{ mon_host | default(groups[mon_group_name][0]) }}"
facts: add a retry on get current fsid task sometimes it can happen the following task fails: ``` TASK [ceph-facts : get current fsid] ******************************************* task path: /home/jenkins-build/build/workspace/ceph-ansible-prs-dev-centos-container-update/roles/ceph-facts/tasks/facts.yml:78 Wednesday 19 June 2019 18:12:49 +0000 (0:00:00.203) 0:02:39.995 ******** fatal: [mon2 -> mon1]: FAILED! => changed=true cmd: - timeout - --foreground - -s - KILL - 600s - docker - exec - ceph-mon-mon1 - ceph - --cluster - ceph - daemon - mon.mon1 - config - get - fsid delta: '0:00:00.239339' end: '2019-06-19 18:12:49.812099' msg: non-zero return code rc: 22 start: '2019-06-19 18:12:49.572760' stderr: 'admin_socket: exception getting command descriptions: [Errno 2] No such file or directory' stderr_lines: <omitted> stdout: '' stdout_lines: <omitted> ``` not sure exactly why since just before this task, mon1 seems to be well UP otherwise it wouldn't have passed the task `waiting for the containerized monitor to join the quorum`. As a quick fix/workaround, let's add a retry which allows us to get around this situation: ``` TASK [ceph-facts : get current fsid] ******************************************* task path: /home/jenkins-build/build/workspace/ceph-ansible-scenario/roles/ceph-facts/tasks/facts.yml:78 Thursday 20 June 2019 15:35:07 +0000 (0:00:00.201) 0:03:47.288 ********* FAILED - RETRYING: get current fsid (3 retries left). changed: [mon2 -> mon1] => changed=true attempts: 2 cmd: - timeout - --foreground - -s - KILL - 600s - docker - exec - ceph-mon-mon1 - ceph - --cluster - ceph - daemon - mon.mon1 - config - get - fsid delta: '0:00:00.290252' end: '2019-06-20 15:35:13.960188' rc: 0 start: '2019-06-20 15:35:13.669936' stderr: '' stderr_lines: <omitted> stdout: |- { "fsid": "153e159d-7ade-42a7-842c-4d04348b901e" } stdout_lines: <omitted> ``` Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2019-06-20 20:45:07 +08:00
until: rolling_update_fsid is succeeded
when: rolling_update | bool
- name: set_fact fsid
set_fact:
fsid: "{{ (rolling_update_fsid.stdout | from_json).fsid }}"
when: rolling_update | bool
- name: set_fact ceph_current_status (convert to json)
set_fact:
ceph_current_status: "{{ ceph_current_status.stdout | from_json }}"
when:
- not rolling_update | bool
- ceph_current_status.rc == 0
- name: set_fact fsid from ceph_current_status
set_fact:
fsid: "{{ ceph_current_status.fsid }}"
when: ceph_current_status.fsid is defined
- name: fsid related tasks
when:
- generate_fsid | bool
- ceph_current_status.fsid is undefined
- not rolling_update | bool
block:
- name: generate cluster fsid
command: "{{ hostvars[groups[mon_group_name][0]]['discovered_interpreter_python'] }} -c 'import uuid; print(str(uuid.uuid4()))'"
register: cluster_uuid
delegate_to: "{{ groups[mon_group_name][0] }}"
run_once: true
- name: set_fact fsid
set_fact:
fsid: "{{ cluster_uuid.stdout }}"
- name: resolve device link(s)
command: readlink -f {{ item }}
changed_when: false
check_mode: no
with_items: "{{ devices }}"
register: devices_prepare_canonicalize
when:
- devices is defined
- inventory_hostname in groups.get(osd_group_name, [])
- not osd_auto_discovery | default(False) | bool
- name: set_fact build devices from resolved symlinks
set_fact:
devices: "{{ devices | default([]) + [ item.stdout ] }}"
with_items: "{{ devices_prepare_canonicalize.results }}"
when:
- devices is defined
- inventory_hostname in groups.get(osd_group_name, [])
- not osd_auto_discovery | default(False) | bool
- name: set_fact build final devices list
set_fact:
devices: "{{ devices | reject('search','/dev/disk') | list | unique }}"
when:
- devices is defined
- inventory_hostname in groups.get(osd_group_name, [])
- not osd_auto_discovery | default(False) | bool
- name: set_fact devices generate device list when osd_auto_discovery
set_fact:
devices: "{{ (devices | default([]) + [ item.key | regex_replace('^', '/dev/') ]) | unique }}"
with_dict: "{{ ansible_devices }}"
when:
- osd_auto_discovery | default(False) | bool
- inventory_hostname in groups.get(osd_group_name, [])
- ansible_devices is defined
- item.value.removable == "0"
- item.value.sectors != "0"
- item.value.partitions|count == 0
- item.value.holders|count == 0
- item.key is not match osd_auto_discovery_exclude
- name: set_fact rgw_hostname
set_fact:
rgw_hostname: "{% set _value = ansible_hostname -%}
{% for key in (ceph_current_status['servicemap']['services']['rgw']['daemons'] | list) -%}
{% if key == ansible_fqdn -%}
{% set _value = key -%}
{% endif -%}
{% endfor -%}
{{ _value }}"
when:
- inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(nfs_group_name, [])
- ceph_current_status['servicemap'] is defined
- ceph_current_status['servicemap']['services'] is defined
- ceph_current_status['servicemap']['services']['rgw'] is defined
- name: set_fact osd_pool_default_pg_num
set_fact:
osd_pool_default_pg_num: "{{ ceph_conf_overrides.get('global', {}).get('osd_pool_default_pg_num', ceph_osd_pool_default_pg_num) }}"
- name: set_fact osd_pool_default_size
set_fact:
osd_pool_default_size: "{{ ceph_conf_overrides.get('global', {}).get('osd_pool_default_size', ceph_osd_pool_default_size) }}"
- name: set_fact osd_pool_default_min_size
set_fact:
osd_pool_default_min_size: "{{ ceph_conf_overrides.get('global', {}).get('osd_pool_default_min_size', ceph_osd_pool_default_min_size) }}"
- name: check if the ceph conf exists
stat:
path: '/etc/ceph/{{ cluster }}.conf'
register: ceph_conf
- name: get default crush rule value from ceph configuration
command: grep 'osd pool default crush rule' /etc/ceph/{{ cluster }}.conf
register: crush_rule_variable
changed_when: false
check_mode: no
failed_when: false
when: ceph_conf.stat.exists
- name: set_fact osd_pool_default_crush_rule
set_fact:
osd_pool_default_crush_rule: "{{ crush_rule_variable.stdout.split(' = ')[1] if crush_rule_variable.get('rc', 1) | int == 0 else ceph_osd_pool_default_crush_rule }}"
- name: import_tasks set_monitor_address.yml
import_tasks: set_monitor_address.yml
when: groups.get(mon_group_name, []) | length > 0
- name: import_tasks set_radosgw_address.yml
import_tasks: set_radosgw_address.yml
when: inventory_hostname in groups.get(rgw_group_name, [])
- name: set_fact use_new_ceph_iscsi package or old ceph-iscsi-config/cli
set_fact:
use_new_ceph_iscsi: "{{ (gateway_ip_list == '0.0.0.0' and gateway_iqn | length == 0 and client_connections | length == 0 and rbd_devices | length == 0) | bool | ternary(true, false) }}"
when: iscsi_gw_group_name in group_names
- name: set_fact ceph_run_cmd
set_fact:
ceph_run_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph/:/var/lib/ceph/:z -v /var/log/ceph/:/var/log/ceph/:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else 'ceph' }}"
delegate_to: "{{ item }}"
delegate_facts: True
run_once: True
with_items:
- "{{ groups[mon_group_name] if groups[mon_group_name] | default([]) | length > 0 else [] }}"
- "{{ groups[mds_group_name] if groups[mds_group_name] | default([]) | length > 0 else [] }}"
- "{{ groups[client_group_name] if groups[client_group_name] | default([]) | length > 0 else [] }}"
- name: set_fact ceph_admin_command
set_fact:
ceph_admin_command: "{{ hostvars[item]['ceph_run_cmd'] }} -n client.admin -k /etc/ceph/{{ cluster }}.client.admin.keyring"
delegate_to: "{{ item }}"
delegate_facts: True
run_once: True
with_items:
- "{{ groups[mon_group_name] if groups[mon_group_name] | default([]) | length > 0 else [] }}"
- "{{ groups[mds_group_name] if groups[mds_group_name] | default([]) | length > 0 else [] }}"
- "{{ groups[client_group_name] if groups[client_group_name] | default([]) | length > 0 else [] }}"