2015-03-27 01:51:11 +08:00
---
# This playbook was meant to upgrade a node from Ubuntu to RHEL.
# We are performing a set of actions prior to reboot the node.
# The node reboots via PXE and gets its new operating system.
# This playbook only works for monitors and OSDs.
# Note that some of the checks are ugly:
# ie: the when migration_completed.stat.exists
# can be improved with includes, however I wanted to keep a single file...
#
- hosts : mons
serial : 1
sudo : True
vars :
backup_dir : /tmp/
tasks :
- name : Check if the node has be migrated already
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/mon/ceph-{{ ansible_facts['hostname'] }}/migration_completed
2015-03-27 01:51:11 +08:00
register : migration_completed
2015-07-29 00:21:15 +08:00
failed_when : false
2015-03-27 01:51:11 +08:00
- name : Check for failed run
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/{{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
register : mon_archive_leftover
- fail : msg="Looks like an archive is already there, please remove it!"
when : migration_completed.stat.exists == False and mon_archive_leftover.stat.exists == True
- name : Compress the store as much as possible
2021-03-03 22:43:50 +08:00
command : ceph tell mon.{{ ansible_facts['hostname'] }} compact
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Check if sysvinit
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/mon/ceph-{{ ansible_facts['hostname'] }}/sysvinit
2015-03-27 01:51:11 +08:00
register : monsysvinit
changed_when : False
- name : Check if upstart
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/mon/ceph-{{ ansible_facts['hostname'] }}/upstart
2015-03-27 01:51:11 +08:00
register : monupstart
changed_when : False
- name : Check if init does what it is supposed to do (Sysvinit)
shell : >
ps faux|grep -sq [c]eph-mon && service ceph status mon >> /dev/null
register : ceph_status_sysvinit
changed_when : False
# can't complete the condition since the previous taks never ran...
- fail : msg="Something is terribly wrong here, sysvinit is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
when : ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
- name : Check if init does what it is supposed to do (upstart)
shell : >
ps faux|grep -sq [c]eph-mon && status ceph-mon-all >> /dev/null
register : ceph_status_upstart
changed_when : False
- fail : msg="Something is terribly wrong here, upstart is configured, the service is started BUT the init script does not return 0, GO FIX YOUR SETUP!"
when : ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
- name : Restart the Monitor after compaction (Upstart)
service : >
name=ceph-mon
state=restarted
2021-03-03 22:43:50 +08:00
args=id={{ ansible_facts['hostname'] }}
2015-03-27 01:51:11 +08:00
when : monupstart.stat.exists == True and migration_completed.stat.exists == False
- name : Restart the Monitor after compaction (Sysvinit)
service : >
name=ceph
state=restarted
args=mon
when : monsysvinit.stat.exists == True and migration_completed.stat.exists == False
- name : Wait for the monitor to be up again
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
host : "{{ ansible_ssh_host | default(inventory_hostname) }}"
port : 6789
timeout : 10
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Stop the monitor (Upstart)
service : >
name=ceph-mon
state=stopped
2021-03-03 22:43:50 +08:00
args=id={{ ansible_facts['hostname'] }}
2015-03-27 01:51:11 +08:00
when : monupstart.stat.exists == True and migration_completed.stat.exists == False
- name : Stop the monitor (Sysvinit)
service : >
name=ceph
state=stopped
args=mon
when : monsysvinit.stat.exists == True and migration_completed.stat.exists == False
- name : Wait for the monitor to be down
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
host : "{{ ansible_ssh_host | default(inventory_hostname) }}"
port : 6789
timeout : 10
state : stopped
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Create a backup directory
file : >
path={{ backup_dir }}/monitors-backups
state=directory
owner=root
group=root
mode=0644
delegate_to : "{{ item }}"
2016-11-03 17:16:33 +08:00
with_items : "{{ groups.backup[0] }}"
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
# NOTE (leseb): should we convert upstart to sysvinit here already?
- name : Archive monitor stores
shell : >
2021-03-03 22:43:50 +08:00
tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
chdir=/var/lib/ceph/
2021-03-03 22:43:50 +08:00
creates={{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Scp the Monitor store
fetch : >
2021-03-03 22:43:50 +08:00
src=/var/lib/ceph/{{ ansible_facts['hostname'] }}.tar
dest={{ backup_dir }}/monitors-backups/{{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
flat=yes
when : migration_completed.stat.exists == False
- name : Reboot the server
command : reboot
when : migration_completed.stat.exists == False
- name : Wait for the server to come up
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
port : 22
delay : 10
timeout : 3600
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Wait a bit more to be sure that the server is ready
pause : seconds=20
when : migration_completed.stat.exists == False
- name : Check if sysvinit
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/mon/ceph-{{ ansible_facts['hostname'] }}/sysvinit
2015-03-27 01:51:11 +08:00
register : monsysvinit
changed_when : False
- name : Check if upstart
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/mon/ceph-{{ ansible_facts['hostname'] }}/upstart
2015-03-27 01:51:11 +08:00
register : monupstart
changed_when : False
- name : Make sure the monitor is stopped (Upstart)
service : >
name=ceph-mon
state=stopped
2021-03-03 22:43:50 +08:00
args=id={{ ansible_facts['hostname'] }}
2015-03-27 01:51:11 +08:00
when : monupstart.stat.exists == True and migration_completed.stat.exists == False
- name : Make sure the monitor is stopped (Sysvinit)
service : >
name=ceph
state=stopped
args=mon
when : monsysvinit.stat.exists == True and migration_completed.stat.exists == False
# NOTE (leseb): 'creates' was added in Ansible 1.6
- name : Copy and unarchive the monitor store
unarchive : >
2021-03-03 22:43:50 +08:00
src={{ backup_dir }}/monitors-backups/{{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
dest=/var/lib/ceph/
copy=yes
mode=0600
creates=etc/ceph/ceph.conf
when : migration_completed.stat.exists == False
- name : Copy keys and configs
shell : >
cp etc/ceph/* /etc/ceph/
chdir=/var/lib/ceph/
when : migration_completed.stat.exists == False
- name : Configure RHEL7 for sysvinit
shell : find -L /var/lib/ceph/mon/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
when : migration_completed.stat.exists == False
# NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
# so we directly call sysvinit
- name : Start the monitor
service : >
name=ceph
state=started
args=mon
when : migration_completed.stat.exists == False
- name : Wait for the Monitor to be up again
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
host : "{{ ansible_ssh_host | default(inventory_hostname) }}"
port : 6789
timeout : 10
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Waiting for the monitor to join the quorum...
shell : >
2021-03-03 22:43:50 +08:00
ceph -s | grep monmap | sed 's/.*quorum//' | egrep -q {{ ansible_facts['hostname'] }}
2015-03-27 01:51:11 +08:00
register : result
until : result.rc == 0
retries : 5
delay : 10
delegate_to : "{{ item }}"
2016-11-03 17:16:33 +08:00
with_items : "{{ groups.backup[0] }}"
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Done moving to the next monitor
file : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/mon/ceph-{{ ansible_facts['hostname'] }}/migration_completed
2015-03-27 01:51:11 +08:00
state=touch
owner=root
group=root
mode=0600
when : migration_completed.stat.exists == False
- hosts : osds
serial : 1
sudo : True
vars :
backup_dir : /tmp/
tasks :
- name : Check if the node has be migrated already
stat : >
path=/var/lib/ceph/migration_completed
register : migration_completed
2015-07-29 00:21:15 +08:00
failed_when : false
2015-03-27 01:51:11 +08:00
- name : Check for failed run
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/{{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
register : osd_archive_leftover
- fail : msg="Looks like an archive is already there, please remove it!"
when : migration_completed.stat.exists == False and osd_archive_leftover.stat.exists == True
- name : Check if init does what it is supposed to do (Sysvinit)
shell : >
ps faux|grep -sq [c]eph-osd && service ceph status osd >> /dev/null
register : ceph_status_sysvinit
changed_when : False
# can't complete the condition since the previous taks never ran...
- fail : msg="Something is terribly wrong here, sysvinit is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
when : ceph_status_sysvinit.rc != 0 and migration_completed.stat.exists == False and monsysvinit.stat.exists == True
- name : Check if init does what it is supposed to do (upstart)
shell : >
ps faux|grep -sq [c]eph-osd && initctl list|egrep -sq "ceph-osd \(ceph/.\) start/running, process [0-9][0-9][0-9][0-9]"
register : ceph_status_upstart
changed_when : False
- fail : msg="Something is terribly wrong here, upstart is configured, the services are started BUT the init script does not return 0, GO FIX YOUR SETUP!"
when : ceph_status_upstart.rc != 0 and migration_completed.stat.exists == False and monupstart.stat.exists == True
- name : Set the noout flag
command : ceph osd set noout
delegate_to : "{{ item }}"
2017-03-24 14:40:15 +08:00
with_items : "{{ groups[mon_group_name][0] }}"
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Check if sysvinit
2015-08-07 13:07:51 +08:00
shell : stat /var/lib/ceph/osd/ceph-*/sysvinit
2015-03-27 01:51:11 +08:00
register : osdsysvinit
2015-07-29 00:21:15 +08:00
failed_when : false
2015-03-27 01:51:11 +08:00
changed_when : False
- name : Check if upstart
2015-08-07 13:07:51 +08:00
shell : stat /var/lib/ceph/osd/ceph-*/upstart
2015-03-27 01:51:11 +08:00
register : osdupstart
2015-07-29 00:21:15 +08:00
failed_when : false
2015-03-27 01:51:11 +08:00
changed_when : False
- name : Archive ceph configs
shell : >
2021-03-03 22:43:50 +08:00
tar -cpvzf - --one-file-system . /etc/ceph/ceph.conf | cat > {{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
chdir=/var/lib/ceph/
2021-03-03 22:43:50 +08:00
creates={{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Create backup directory
file : >
path={{ backup_dir }}/osds-backups
state=directory
owner=root
group=root
mode=0644
delegate_to : "{{ item }}"
2016-11-03 17:16:33 +08:00
with_items : "{{ groups.backup[0] }}"
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Scp OSDs dirs and configs
fetch : >
2021-03-03 22:43:50 +08:00
src=/var/lib/ceph/{{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
dest={{ backup_dir }}/osds-backups/
flat=yes
when : migration_completed.stat.exists == False
- name : Collect OSD ports
shell : netstat -tlpn | awk -F ":" '/ceph-osd/ { sub (" .*", "", $2); print $2 }' | uniq
register : osd_ports
when : migration_completed.stat.exists == False
- name : Gracefully stop the OSDs (Upstart)
service : >
name=ceph-osd-all
state=stopped
2015-08-07 13:07:51 +08:00
when : osdupstart.rc == 0 and migration_completed.stat.exists == False
2015-03-27 01:51:11 +08:00
- name : Gracefully stop the OSDs (Sysvinit)
service : >
name=ceph
state=stopped
args=mon
2015-08-07 13:07:51 +08:00
when : osdsysvinit.rc == 0 and migration_completed.stat.exists == False
2015-03-27 01:51:11 +08:00
- name : Wait for the OSDs to be down
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
host : "{{ ansible_ssh_host | default(inventory_hostname) }}"
port : {{ item }}
timeout : 10
state : stopped
2016-11-03 17:16:33 +08:00
with_items : "{{ osd_ports.stdout_lines }}"
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Configure RHEL with sysvinit
shell : find -L /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -exec touch {}/sysvinit \; -exec rm {}/upstart \;
when : migration_completed.stat.exists == False
- name : Reboot the server
command : reboot
when : migration_completed.stat.exists == False
- name : Wait for the server to come up
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
port : 22
delay : 10
timeout : 3600
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Wait a bit to be sure that the server is ready for scp
pause : seconds=20
when : migration_completed.stat.exists == False
# NOTE (leseb): 'creates' was added in Ansible 1.6
- name : Copy and unarchive the OSD configs
unarchive : >
2021-03-03 22:43:50 +08:00
src={{ backup_dir }}/osds-backups/{{ ansible_facts['hostname'] }}.tar
2015-03-27 01:51:11 +08:00
dest=/var/lib/ceph/
copy=yes
mode=0600
creates=etc/ceph/ceph.conf
when : migration_completed.stat.exists == False
- name : Copy keys and configs
shell : >
cp etc/ceph/* /etc/ceph/
chdir=/var/lib/ceph/
when : migration_completed.stat.exists == False
# NOTE (leseb): at this point the upstart and sysvinit checks are not necessary
# so we directly call sysvinit
- name : Start all the OSDs
service : >
name=ceph-osd-all
state=started
args=osd
when : migration_completed.stat.exists == False
# NOTE (leseb): this is tricky unless this is set into the ceph.conf
# listened ports can be predicted, thus they will change after each restart
# - name: Wait for the OSDs to be up again
# local_action: >
# wait_for
# host={{ ansible_ssh_host | default(inventory_hostname) }}
# port={{ item }}
# timeout=30
# with_items:
# - "{{ osd_ports.stdout_lines }}"
- name : Waiting for clean PGs...
shell : >
2019-02-01 21:32:14 +08:00
test "[""$(ceph -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$(ceph -s -f json | python -c 'import sys, json; print([ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"])')"
2015-03-27 01:51:11 +08:00
register : result
until : result.rc == 0
retries : 10
delay : 10
delegate_to : "{{ item }}"
2016-11-03 17:16:33 +08:00
with_items : "{{ groups.backup[0] }}"
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
- name : Done moving to the next OSD
file : >
path=/var/lib/ceph/migration_completed
state=touch
owner=root
group=root
mode=0600
when : migration_completed.stat.exists == False
- name : Unset the noout flag
command : ceph osd unset noout
delegate_to : "{{ item }}"
2017-03-24 14:40:15 +08:00
with_items : "{{ groups[mon_group_name][0] }}"
2015-03-27 01:51:11 +08:00
when : migration_completed.stat.exists == False
2015-04-01 20:48:19 +08:00
- hosts : rgws
serial : 1
sudo : True
vars :
backup_dir : /tmp/
tasks :
- name : Check if the node has be migrated already
stat : >
path=/var/lib/ceph/radosgw/migration_completed
register : migration_completed
2015-07-29 00:21:15 +08:00
failed_when : false
2015-04-01 20:48:19 +08:00
- name : Check for failed run
stat : >
2021-03-03 22:43:50 +08:00
path=/var/lib/ceph/{{ ansible_facts['hostname'] }}.tar
2015-04-01 20:48:19 +08:00
register : rgw_archive_leftover
- fail : msg="Looks like an archive is already there, please remove it!"
when : migration_completed.stat.exists == False and rgw_archive_leftover.stat.exists == True
2016-10-26 05:56:58 +08:00
- name : Archive rados gateway configs
2015-04-01 20:48:19 +08:00
shell : >
2021-03-03 22:43:50 +08:00
tar -cpvzf - --one-file-system . /etc/ceph/* | cat > {{ ansible_facts['hostname'] }}.tar
2015-04-01 20:48:19 +08:00
chdir=/var/lib/ceph/
2021-03-03 22:43:50 +08:00
creates={{ ansible_facts['hostname'] }}.tar
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
- name : Create backup directory
file : >
path={{ backup_dir }}/rgws-backups
state=directory
owner=root
group=root
mode=0644
delegate_to : "{{ item }}"
2016-11-03 17:16:33 +08:00
with_items : "{{ groups.backup[0] }}"
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
- name : Scp RGWs dirs and configs
fetch : >
2021-03-03 22:43:50 +08:00
src=/var/lib/ceph/{{ ansible_facts['hostname'] }}.tar
2015-04-01 20:48:19 +08:00
dest={{ backup_dir }}/rgws-backups/
flat=yes
when : migration_completed.stat.exists == False
2016-11-18 06:43:31 +08:00
- name : Gracefully stop the rados gateway
2015-04-01 20:48:19 +08:00
service : >
name={{ item }}
state=stopped
2019-04-01 23:46:15 +08:00
with_items : radosgw
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
- name : Wait for radosgw to be down
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
host : "{{ ansible_ssh_host | default(inventory_hostname) }}"
path : /tmp/radosgw.sock
state : absent
timeout : 30
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
- name : Reboot the server
command : reboot
when : migration_completed.stat.exists == False
- name : Wait for the server to come up
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
port : 22
delay : 10
timeout : 3600
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
- name : Wait a bit to be sure that the server is ready for scp
pause : seconds=20
when : migration_completed.stat.exists == False
# NOTE (leseb): 'creates' was added in Ansible 1.6
- name : Copy and unarchive the OSD configs
unarchive : >
2021-03-03 22:43:50 +08:00
src={{ backup_dir }}/rgws-backups/{{ ansible_facts['hostname'] }}.tar
2015-04-01 20:48:19 +08:00
dest=/var/lib/ceph/
copy=yes
mode=0600
creates=etc/ceph/ceph.conf
when : migration_completed.stat.exists == False
- name : Copy keys and configs
2016-10-26 05:56:58 +08:00
shell : >
{{ item }}
chdir=/var/lib/ceph/
2019-04-01 23:46:15 +08:00
with_items : cp etc/ceph/* /etc/ceph/
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
2016-11-18 06:43:31 +08:00
- name : Start rados gateway
2015-04-01 20:48:19 +08:00
service : >
name={{ item }}
state=started
2019-04-01 23:46:15 +08:00
with_items : radosgw
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
- name : Wait for radosgw to be up again
syntax: change local_action syntax
Use a nicer syntax for `local_action` tasks.
We used to have oneliner like this:
```
local_action: wait_for port=22 host={{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} state=started delay=10 timeout=500 }}
```
The usual syntax:
```
local_action:
module: wait_for
port: 22
host: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }}"
state: started
delay: 10
timeout: 500
```
is nicer and kind of way to keep consistency regarding the whole
playbook.
This also fix a potential issue about missing quotation :
```
Traceback (most recent call last):
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 213, in <module>
main()
File "/tmp/ansible_wQtWsi/ansible_module_command.py", line 185, in main
rc, out, err = module.run_command(args, executable=executable, use_unsafe_shell=shell, encoding=None, data=stdin)
File "/tmp/ansible_wQtWsi/ansible_modlib.zip/ansible/module_utils/basic.py", line 2710, in run_command
File "/usr/lib64/python2.7/shlex.py", line 279, in split
return list(lex) File "/usr/lib64/python2.7/shlex.py", line 269, in next
token = self.get_token()
File "/usr/lib64/python2.7/shlex.py", line 96, in get_token
raw = self.read_token()
File "/usr/lib64/python2.7/shlex.py", line 172, in read_token
raise ValueError, "No closing quotation"
ValueError: No closing quotation
```
writing `local_action: shell echo {{ fsid }} | tee {{ fetch_directory }}/ceph_cluster_uuid.conf`
can cause trouble because it's complaining with missing quotes, this fix solves this issue.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1510555
Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
2018-01-31 16:23:28 +08:00
local_action :
module : wait_for
host : "{{ ansible_ssh_host | default(inventory_hostname) }}"
path : /tmp/radosgw.sock
state : present
timeout : 30
2015-04-01 20:48:19 +08:00
when : migration_completed.stat.exists == False
- name : Done moving to the next rados gateway
file : >
path=/var/lib/ceph/radosgw/migration_completed
state=touch
owner=root
group=root
mode=0600
when : migration_completed.stat.exists == False