osds: use pg stat command instead of ceph status

The ceph status command returns a lot of information stored in variables and/or facts which could consume resources for nothing. When checking the pgs state, we're using the pgmap structure in the ceph status output. To optimize this, we could use the ceph pg stat command which contains the same needed information. This command returns less information (only about pgs) and is slightly faster than the ceph status command. $ ceph status -f json | wc -c 2000 $ ceph pg stat -f json | wc -c 240 $ time ceph status -f json > /dev/null real 0m0.529s user 0m0.503s sys 0m0.024s $ time ceph pg stat -f json > /dev/null real 0m0.426s user 0m0.409s sys 0m0.016s The data returned by the ceph status is even bigger when using the nautilus release. $ ceph status -f json | wc -c 35005 $ ceph pg stat -f json | wc -c 240 Signed-off-by: Dimitri Savineau <dsavinea@redhat.com> (cherry picked from commit ee50588590)
2020-10-26 11:23:01 -04:00 · 2020-10-26 11:23:01 -04:00 · a8e2bc087f
parent b4c1f325a8
commit a8e2bc087f
3 changed files with 11 additions and 11 deletions
--- a/infrastructure-playbooks/cephadm-adopt.yml
+++ b/infrastructure-playbooks/cephadm-adopt.yml
@ -479,13 +479,13 @@
      when: not containerized_deployment | bool

    - name: waiting for clean pgs...
-      command: "{{ cephadm_cmd }} shell --fsid {{ fsid }} -- ceph --cluster {{ cluster }} -s --format json"
+      command: "{{ cephadm_cmd }} shell --fsid {{ fsid }} -- ceph --cluster {{ cluster }} pg stat --format json"
      changed_when: false
      register: ceph_health_post
      until: >
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0)
        and
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_health_post.stdout | from_json).pgmap.num_pgs)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_health_post.stdout | from_json).pg_summary.num_pgs)
      delegate_to: "{{ groups[mon_group_name][0] }}"
      retries: "{{ health_osd_check_retries }}"
      delay: "{{ health_osd_check_delay }}"
--- a/infrastructure-playbooks/rolling_update.yml
+++ b/infrastructure-playbooks/rolling_update.yml
@ -415,21 +415,21 @@
        - not containerized_deployment | bool

    - name: get num_pgs - non container
-      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json"
+      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} pg stat --format json"
      register: ceph_pgs
      delegate_to: "{{ groups[mon_group_name][0] }}"

    - name: waiting for clean pgs...
-      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} -s --format json"
+      command: "{{ container_exec_cmd_update_osd|default('') }} ceph --cluster {{ cluster }} pg stat --format json"
      register: ceph_health_post
      until: >
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0)
        and
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_pgs.stdout | from_json).pgmap.num_pgs)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_pgs.stdout | from_json).pg_summary.num_pgs)
      delegate_to: "{{ groups[mon_group_name][0] }}"
      retries: "{{ health_osd_check_retries }}"
      delay: "{{ health_osd_check_delay }}"
-      when: (ceph_pgs.stdout | from_json).pgmap.num_pgs != 0
+      when: (ceph_pgs.stdout | from_json).pg_summary.num_pgs != 0


 - name: complete osd upgrade
--- a/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml
+++ b/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml
@ -320,12 +320,12 @@
  post_tasks:
    - name: container - waiting for clean pgs...
      command: >
-        {{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} -s --format json
+        {{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }} ceph --cluster {{ cluster }} pg stat --format json
      register: ceph_health_post
      until: >
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | length) > 0)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | length) > 0)
        and
-        (((ceph_health_post.stdout | from_json).pgmap.pgs_by_state | selectattr('state_name', 'search', '^active\\+clean') | map(attribute='count') | list | sum) == (ceph_health_post.stdout | from_json).pgmap.num_pgs)
+        (((ceph_health_post.stdout | from_json).pg_summary.num_pg_by_state | selectattr('name', 'search', '^active\\+clean') | map(attribute='num') | list | sum) == (ceph_health_post.stdout | from_json).pg_summary.num_pgs)
      delegate_to: "{{ groups[mon_group_name][0] }}"
      retries: "{{ health_osd_check_retries }}"
      delay: "{{ health_osd_check_delay }}"