diff --git a/example/config.yml b/example/config.yml index 7cf8925..e52d39b 100644 --- a/example/config.yml +++ b/example/config.yml @@ -180,6 +180,10 @@ dashboard_install: "yes" dashboardVer: "__dashboard__" dashboardMetricsScraperVer: "__dash_metrics__" +# prometheus 自动安装 +prom_install: "no" +prom_namespace: "monitor" +prom_chart_ver: "__prom_chart__" ############################ # role:harbor diff --git a/ezctl b/ezctl index ec0882b..8c9a565 100755 --- a/ezctl +++ b/ezctl @@ -124,6 +124,7 @@ function new() { dashboardVer=$(grep 'dashboardVer=' ezdown|cut -d'=' -f2) dashboardMetricsScraperVer=$(grep 'dashboardMetricsScraperVer=' ezdown|cut -d'=' -f2) metricsVer=$(grep 'metricsVer=' ezdown|cut -d'=' -f2) + promChartVer=$(grep 'promChartVer=' ezdown|cut -d'=' -f2) sed -i -e "s/__flannel__/$flannelVer/g" \ -e "s/__calico__/$calicoVer/g" \ @@ -133,6 +134,7 @@ function new() { -e "s/__coredns__/$corednsVer/g" \ -e "s/__dashboard__/$dashboardVer/g" \ -e "s/__dash_metrics__/$dashboardMetricsScraperVer/g" \ + -e "s/__prom_chart__/$promChartVer/g" \ -e "s/__metrics__/$metricsVer/g" "clusters/$1/config.yml" diff --git a/ezdown b/ezdown index 47fc402..bfd0ab2 100755 --- a/ezdown +++ b/ezdown @@ -22,14 +22,15 @@ SYS_PKG_VER=0.3.3 # images needed by k8s cluster calicoVer=v3.15.3 flannelVer=v0.13.0-amd64 -export ciliumVer=v1.4.1 -export kubeRouterVer=v0.3.1 -export kubeOvnVer=v1.5.3 corednsVer=1.7.1 dashboardVer=v2.1.0 dashboardMetricsScraperVer=v1.0.6 metricsVer=v0.3.6 pauseVer=3.2 +export ciliumVer=v1.4.1 +export kubeRouterVer=v0.3.1 +export kubeOvnVer=v1.5.3 +export promChartVer=12.10.6 function usage() { echo -e "\033[33mUsage:\033[0m ezdown [options] [args]" diff --git a/roles/cluster-addon/files/kube-prometheus-stack-12.10.6.tgz b/roles/cluster-addon/files/kube-prometheus-stack-12.10.6.tgz new file mode 100644 index 0000000..c4ec0b4 Binary files /dev/null and b/roles/cluster-addon/files/kube-prometheus-stack-12.10.6.tgz differ diff --git a/roles/cluster-addon/tasks/main.yml b/roles/cluster-addon/tasks/main.yml index 7cbddb7..f99a1a0 100644 --- a/roles/cluster-addon/tasks/main.yml +++ b/roles/cluster-addon/tasks/main.yml @@ -106,6 +106,9 @@ #- import_tasks: ingress.yml # when: '"ingress-controller" not in pod_info.stdout and ingress_install == "yes"' +- import_tasks: prometheus.yml + when: '"kube-prometheus-operator" not in pod_info.stdout and prom_install == "yes"' + #- block: # - block: # - name: 尝试推送离线 metallb镜像(若执行失败,可忽略) diff --git a/roles/cluster-addon/tasks/prometheus.yml b/roles/cluster-addon/tasks/prometheus.yml new file mode 100644 index 0000000..314f9c0 --- /dev/null +++ b/roles/cluster-addon/tasks/prometheus.yml @@ -0,0 +1,47 @@ +- block: + - name: 获取是否已创建命名空间{{ prom_namespace }} + shell: "{{ base_dir }}/bin/kubectl get ns" + register: ns_info + + - name: 创建命名空间{{ prom_namespace }} + shell: "{{ base_dir }}/bin/kubectl create ns {{ prom_namespace }}" + when: "prom_namespace not in ns_info.stdout" + + - name: get etcd-client-cert info + shell: "{{ base_dir }}/bin/kubectl get secrets -n {{ prom_namespace }}" + register: secrets_info + + - name: 创建etcd-client 证书请求 + template: src=prometheus/etcd-client-csr.json.j2 dest={{ cluster_dir }}/ssl/etcd-client-csr.json + when: '"etcd-client-cert" not in secrets_info.stdout' + + - name: 创建 etcd-client证书和私钥 + shell: "cd {{ cluster_dir }}/ssl && {{ base_dir }}/bin/cfssl gencert \ + -ca=ca.pem \ + -ca-key=ca-key.pem \ + -config=ca-config.json \ + -profile=kubernetes etcd-client-csr.json|{{ base_dir }}/bin/cfssljson -bare etcd-client" + when: '"etcd-client-cert" not in secrets_info.stdout' + + - name: 创建 etcd-client-cert + shell: "cd {{ cluster_dir }}/ssl && \ + {{ base_dir }}/bin/kubectl create secret generic -n {{ prom_namespace }} etcd-client-cert \ + --from-file=etcd-ca=ca.pem \ + --from-file=etcd-client=etcd-client.pem \ + --from-file=etcd-client-key=etcd-client-key.pem" + when: '"etcd-client-cert" not in secrets_info.stdout' + + # 判断 kubernetes 版本 + - name: 注册变量 K8S_VER + shell: "{{ base_dir }}/bin/kube-apiserver --version|cut -d' ' -f2|cut -d'v' -f2" + register: K8S_VER + + - name: 创建 prom chart 个性化设置 + template: src=prometheus/values.yaml.j2 dest={{ cluster_dir }}/yml/prom-values.yaml + + - name: helm 创建 kube-prometheus-stack {{ prom_chart_ver }} + shell: "{{ base_dir }}/bin/helm install -n {{ prom_namespace }} prometheus \ + -f {{ cluster_dir }}/yml/prom-values.yaml \ + {{ base_dir }}/roles/cluster-addon/files/kube-prometheus-stack-{{ prom_chart_ver }}.tgz" + run_once: true + connection: local diff --git a/roles/cluster-addon/templates/prometheus/etcd-client-csr.json.j2 b/roles/cluster-addon/templates/prometheus/etcd-client-csr.json.j2 new file mode 100644 index 0000000..b896c4b --- /dev/null +++ b/roles/cluster-addon/templates/prometheus/etcd-client-csr.json.j2 @@ -0,0 +1,17 @@ +{ + "CN": "etcd-client", + "hosts": [], + "key": { + "algo": "rsa", + "size": 2048 + }, + "names": [ + { + "C": "CN", + "ST": "HangZhou", + "L": "XS", + "O": "k8s", + "OU": "System" + } + ] +} diff --git a/roles/cluster-addon/templates/prometheus/values.yaml.j2 b/roles/cluster-addon/templates/prometheus/values.yaml.j2 new file mode 100644 index 0000000..1d891e0 --- /dev/null +++ b/roles/cluster-addon/templates/prometheus/values.yaml.j2 @@ -0,0 +1,191 @@ +## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.16.6 +kubeTargetVersionOverride: "{{ K8S_VER.stdout }}" + +## Configuration for alertmanager +alertmanager: + enabled: true + config: + global: + resolve_timeout: 5m + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - match: + alertname: Watchdog + receiver: 'null' + receivers: + - name: 'null' + + ## Configuration for Alertmanager service + service: + nodePort: 39902 + type: NodePort + + alertmanagerSpec: + image: + repository: quay.io/prometheus/alertmanager + tag: v0.21.0 + + replicas: 1 + retention: 120h + + nodeSelector: {} + +## Using default values from https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml +grafana: + enabled: true + defaultDashboardsEnabled: true + adminPassword: Admin1234! + + service: + nodePort: 39903 + type: NodePort + +## Component scraping the kube api server +kubeApiServer: + enabled: true + +## Component scraping the kubelet and kubelet-hosted cAdvisor +kubelet: + enabled: true + namespace: kube-system + +## Component scraping the kube controller manager +kubeControllerManager: + enabled: true + endpoints: +{% for h in groups['kube-master'] %} + - {{ h }} +{% endfor %} + +## Component scraping coreDns. Use either this or kubeDns +coreDns: + enabled: true + +## Component scraping etcd +kubeEtcd: + enabled: true + endpoints: +{% for h in groups['etcd'] %} + - {{ h }} +{% endfor %} + + ## Configure secure access to the etcd cluster by loading a secret into prometheus and + ## specifying security configuration below. For example, with a secret named etcd-client-cert + serviceMonitor: + scheme: https + insecureSkipVerify: true + serverName: localhost + caFile: /etc/prometheus/secrets/etcd-client-cert/etcd-ca + certFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client + keyFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key + + +## Component scraping kube scheduler +kubeScheduler: + enabled: true + endpoints: +{% for h in groups['kube-master'] %} + - {{ h }} +{% endfor %} + +## Component scraping kube proxy +kubeProxy: + enabled: true + endpoints: +{% for h in groups['kube-master'] %} + - {{ h }} +{% endfor %} +{% for h in groups['kube-node'] %} +{% if h not in groups['kube-master'] %} + - {{ h }} +{% endif %} +{% endfor %} + +## Manages Prometheus and Alertmanager components +prometheusOperator: + enabled: true + + ## Namespaces to scope the interaction of the Prometheus Operator and the apiserver (allow list). + ## This is mutually exclusive with denyNamespaces. Setting this to an empty object will disable the configuration + namespaces: {} + # releaseNamespace: true + # additional: + # - kube-system + + ## Namespaces not to scope the interaction of the Prometheus Operator (deny list). + denyNamespaces: [] + + ## Filter namespaces to look for prometheus-operator custom resources + ## + alertmanagerInstanceNamespaces: [] + prometheusInstanceNamespaces: [] + thanosRulerInstanceNamespaces: [] + + service: + nodePort: 39899 + nodePortTls: 39900 + type: NodePort + + nodeSelector: {} + + ## Prometheus-operator image + image: + repository: quay.io/prometheus-operator/prometheus-operator + tag: v0.44.0 + + ## Configmap-reload image to use for reloading configmaps + configmapReloadImage: + repository: docker.io/jimmidyson/configmap-reload + tag: v0.4.0 + + ## Prometheus-config-reloader image to use for config and rule reloading + prometheusConfigReloaderImage: + repository: quay.io/prometheus-operator/prometheus-config-reloader + tag: v0.44.0 + +## Deploy a Prometheus instance +prometheus: + enabled: true + + ## Configuration for Prometheus service + service: + nodePort: 39901 + type: NodePort + + ## Settings affecting prometheusSpec + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#prometheusspec + prometheusSpec: + ## APIServerConfig + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#apiserverconfig + apiserverConfig: {} + + image: + repository: quay.io/prometheus/prometheus + tag: v2.22.1 + + replicas: 1 + + secrets: + - etcd-client-cert + + storageSpec: {} + ## Using PersistentVolumeClaim + ## + # volumeClaimTemplate: + # spec: + # storageClassName: gluster + # accessModes: ["ReadWriteOnce"] + # resources: + # requests: + # storage: 50Gi + # selector: {} + + ## Using tmpfs volume + ## + # emptyDir: + # medium: Memory diff --git a/roles/deploy/tasks/main.yml b/roles/deploy/tasks/main.yml index df047a8..0d1623f 100644 --- a/roles/deploy/tasks/main.yml +++ b/roles/deploy/tasks/main.yml @@ -3,6 +3,7 @@ with_items: - "{{ cluster_dir }}/ssl" - "{{ cluster_dir }}/backup" + - "{{ cluster_dir }}/yml" - "~/.kube" - name: 本地设置 bin 目录权限