From a8e2bc087f69a4098cb75086c0a8d5b504e3426a Mon Sep 17 00:00:00 2001
From: Dimitri Savineau <dsavinea@redhat.com>
Date: Mon, 26 Oct 2020 11:23:01 -0400
Subject: [PATCH] osds: use pg stat command instead of ceph status

The ceph status command returns a lot of information stored in variables
and/or facts which could consume resources for nothing.
When checking the pgs state, we're using the pgmap structure in the ceph
status output.
To optimize this, we could use the ceph pg stat command which contains
the same needed information.
This command returns less information (only about pgs) and is slightly
faster than the ceph status command.

$ ceph status -f json | wc -c
2000
$ ceph pg stat -f json | wc -c
240
$ time ceph status -f json > /dev/null

real	0m0.529s
user	0m0.503s
sys	0m0.024s
$ time ceph pg stat -f json > /dev/null

real	0m0.426s
user	0m0.409s
sys	0m0.016s

The data returned by the ceph status is even bigger when using the
nautilus release.

$ ceph status -f json | wc -c
35005
$ ceph pg stat -f json | wc -c
240

Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
(cherry picked from commit ee505885908ac2ae15bf201a638359faaf78d251)
---
 infrastructure-playbooks/cephadm-adopt.yml             |  6 +++---
 infrastructure-playbooks/rolling_update.yml            | 10 +++++-----
 ...non-containerized-to-containerized-ceph-daemons.yml |  6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/infrastructure-playbooks/cephadm-adopt.yml b/infrastructure-playbooks/cephadm-adopt.yml
index 14ced297c..fc5be11a2 100644
--- a/infrastructure-playbooks/cephadm-adopt.yml
+++ b/infrastructure-playbooks/cephadm-adopt.yml
@@ -479,13 +479,13 @@
       when: not containerized_deployment | bool
 
     - name: waiting for clean pgs...
-      command: "{{ cephadm_cmd }} shell --fsid {{ fsid }} -- ceph --cluster {{ cluster }} -s --format json"
+      command: "{{ cephadm_cmd }} shell --fsid {{ fsid }} -- ceph --cluster {{ cluster }} pg stat --format json"
       changed_when: false
       register: ceph_health_post
       until: >
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0)
         and
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_health_post.stdout | from_json).pgmap.num_pgs)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_health_post.stdout | from_json).pg_summary.num_pgs)
       delegate_to: "{{ groups[mon_group_name][0] }}"
       retries: "{{ health_osd_check_retries }}"
       delay: "{{ health_osd_check_delay }}"
diff --git a/infrastructure-playbooks/rolling_update.yml b/infrastructure-playbooks/rolling_update.yml
index 6a2f1fbc3..4c46b6369 100644
--- a/infrastructure-playbooks/rolling_update.yml
+++ b/infrastructure-playbooks/rolling_update.yml
@@ -415,21 +415,21 @@
         - not containerized_deployment | bool
 
     - name: get num_pgs - non container
-      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json"
+      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} pg stat --format json"
       register: ceph_pgs
       delegate_to: "{{ groups[mon_group_name][0] }}"
 
     - name: waiting for clean pgs...
-      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json"
+      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} pg stat --format json"
       register: ceph_health_post
       until: >
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0)
         and
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_pgs.stdout | from_json).pgmap.num_pgs)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_pgs.stdout | from_json).pg_summary.num_pgs)
       delegate_to: "{{ groups[mon_group_name][0] }}"
       retries: "{{ health_osd_check_retries }}"
       delay: "{{ health_osd_check_delay }}"
-      when: (ceph_pgs.stdout | from_json).pgmap.num_pgs != 0
+      when: (ceph_pgs.stdout | from_json).pg_summary.num_pgs != 0
 
 
 - name: complete osd upgrade
diff --git a/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml b/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml
index 4f4e6d531..eab634bd9 100644
--- a/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml
+++ b/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml
@@ -320,12 +320,12 @@
   post_tasks:
     - name: container - waiting for clean pgs...
       command: >
-        {{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s --format json
+        {{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} pg stat --format json
       register: ceph_health_post
       until: >
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0)
         and
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_health_post.stdout | from_json).pgmap.num_pgs)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_health_post.stdout | from_json).pg_summary.num_pgs)
       delegate_to: "{{ groups[mon_group_name][0] }}"
       retries: "{{ health_osd_check_retries }}"
       delay: "{{ health_osd_check_delay }}"