From a9b337ba660da641f36c79a92e0aace217175ff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Tue, 27 Nov 2018 10:45:05 +0100 Subject: [PATCH] handler: show unit logs on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will tremendously help debugging daemons that fail on restart by showing the systemd unit logs. Signed-off-by: Sébastien Han --- roles/ceph-handler/templates/restart_mds_daemon.sh.j2 | 3 ++- roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 | 3 ++- roles/ceph-handler/templates/restart_mon_daemon.sh.j2 | 3 ++- roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 | 3 ++- roles/ceph-handler/templates/restart_osd_daemon.sh.j2 | 3 ++- roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 | 3 ++- roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 index db817e6d8..2300386e0 100644 --- a/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 @@ -21,5 +21,6 @@ while [ $RETRIES -ne 0 ]; do let RETRIES=RETRIES-1 done # If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running." +echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running. Showing ceph-mds unit logs now:" +journalctl -u ceph-mds@${MDS_NAME} exit 1 diff --git a/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 index d9dc72801..a9bbc9f96 100644 --- a/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 @@ -22,5 +22,6 @@ while [ $RETRIES -ne 0 ]; do let RETRIES=RETRIES-1 done # If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running." +echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running. Showing ceph-mgr unit logs now:" +journalctl -u ceph-mgr@${MGR_NAME} exit 1 diff --git a/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 index 748b07374..0e8318c46 100644 --- a/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 @@ -38,5 +38,6 @@ while [ $COUNT -ne 0 ]; do let COUNT=COUNT-1 done # If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means the monitor is not running." +echo "Socket file ${SOCKET} could not be found, which means the monitor is not running. Showing ceph-mon unit logs now:" +journalctl -u ceph-mon@{{ ansible_hostname }} exit 1 diff --git a/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 b/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 index 628b05fe4..6567ed3a9 100644 --- a/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 @@ -18,7 +18,8 @@ while [ $RETRIES -ne 0 ]; do let RETRIES=RETRIES-1 done # If we reach this point, it means the pid is not present. -echo "PID file ${PID} could not be found, which means Ganesha is not running." +echo "PID file ${PID} could not be found, which means Ganesha is not running. Showing $NFS_NAME unit logs now:" +journalctl -u $NFS_NAME exit 1 {% else %} systemctl restart nfs-ganesha diff --git a/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 b/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 index 09de06d5c..7559271c5 100644 --- a/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 @@ -87,6 +87,7 @@ for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph- let COUNT=COUNT-1 done # If we reach this point, it means the socket is not present. - echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running." + echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running. Showing ceph-osd unit logs now:" + journalctl -u "${unit}" exit 1 done diff --git a/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 index 52113b660..bb6b58da9 100644 --- a/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 @@ -21,5 +21,6 @@ while [ $RETRIES -ne 0 ]; do let RETRIES=RETRIES-1 done # If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running." +echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running. Showing ceph-rbd-mirror unit logs now:" +journalctl -u ceph-rbd-mirror@rbd-mirror.${RBD_MIRROR_NAME} exit 1 diff --git a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 index cfb43d119..976b6cc3b 100644 --- a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 +++ b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 @@ -46,5 +46,6 @@ while [ $COUNT -ne 0 ]; do sleep $DELAY let COUNT=COUNT-1 done -echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running." +echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running. Showing ceph-rgw unit logs now:" +journalctl -u ceph-radosgw@rgw.${RGW_NAME} exit 1