Deploy Prometheus and Grafana on Kubernetes

29 Dec 2019

Tested On

OS: Ubuntu 18.04
Kubernetes Version: v1.15.3
Docker Version: 18.09.8
Prometheus Version: 2.12.0

Prometheus Deployment

kubectl create namespace monitoring

prometheus-cluster-role.yaml:

apiVersion: rbac.authorization.k8s.io/v1beta1
 kind: ClusterRole
 metadata:
   name: prometheus
 rules:
 apiGroups: [""]
 resources:
 nodes
 nodes/proxy
 services
 endpoints
 pods
 verbs: ["get", "list", "watch"]
 apiGroups:
 extensions
 resources:
 ingresses
 verbs: ["get", "list", "watch"]
 nonResourceURLs: ["/metrics"]
   verbs: ["get"]
 apiVersion: rbac.authorization.k8s.io/v1beta1
 kind: ClusterRoleBinding
 metadata:
   name: prometheus
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
   name: prometheus
 subjects:
 kind: ServiceAccount
 name: default
 namespace: monitoring

prometheus-config-map.yaml:

apiVersion: v1
 kind: ConfigMap
 metadata:
   name: prometheus-server-conf
   labels:
     name: prometheus-server-conf
   namespace: monitoring
 data:
   prometheus.rules: |-
     groups:
     - name: devopscube demo alert
       rules:
       - alert: High Pod Memory
         expr: sum(container_memory_usage_bytes) > 1
         for: 1m
         labels:
           severity: slack
         annotations:
           summary: High Memory Usage
   prometheus.yml: |-
     global:
       scrape_interval: 5s
       evaluation_interval: 5s
     rule_files:
       - /etc/prometheus/prometheus.rules
     alerting:
       alertmanagers:
       - scheme: http
         static_configs:
         - targets:
           - "alertmanager.monitoring.svc:9093"
 scrape_configs:   - job_name: 'kubernetes-apiservers'     kubernetes_sd_configs:     - role: endpoints     scheme: https     tls_config:       ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt     bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token     relabel_configs:     - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]       action: keep       regex: default;kubernetes;https   - job_name: 'kubernetes-nodes'     scheme: https     tls_config:       ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt     bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token     kubernetes_sd_configs:     - role: node     relabel_configs:     - action: labelmap       regex: __meta_kubernetes_node_label_(.+)     - target_label: __address__       replacement: kubernetes.default.svc:443     - source_labels: [__meta_kubernetes_node_name]       regex: (.+)       target_label: __metrics_path__       replacement: /api/v1/nodes/${1}/proxy/metrics   - job_name: 'kubernetes-pods'     kubernetes_sd_configs:     - role: pod     relabel_configs:     - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]       action: keep       regex: true     - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]       action: replace       target_label: __metrics_path__       regex: (.+)     - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]       action: replace       regex: ([^:]+)(?::\d+)?;(\d+)       replacement: $1:$2       target_label: __address__     - action: labelmap       regex: __meta_kubernetes_pod_label_(.+)     - source_labels: [__meta_kubernetes_namespace]       action: replace       target_label: kubernetes_namespace     - source_labels: [__meta_kubernetes_pod_name]       action: replace       target_label: kubernetes_pod_name   - job_name: 'kubernetes-cadvisor'     scheme: https     tls_config:       ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt     bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token     kubernetes_sd_configs:     - role: node     relabel_configs:     - action: labelmap       regex: __meta_kubernetes_node_label_(.+)     - target_label: __address__       replacement: kubernetes.default.svc:443     - source_labels: [__meta_kubernetes_node_name]       regex: (.+)       target_label: __metrics_path__       replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor   - job_name: 'kubernetes-service-endpoints'     kubernetes_sd_configs:     - role: endpoints     relabel_configs:     - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]       action: keep       regex: true     - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]       action: replace       target_label: __scheme__       regex: (https?)     - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]       action: replace       target_label: __metrics_path__       regex: (.+)     - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]       action: replace       target_label: __address__       regex: ([^:]+)(?::\d+)?;(\d+)       replacement: $1:$2     - action: labelmap       regex: __meta_kubernetes_service_label_(.+)     - source_labels: [__meta_kubernetes_namespace]       action: replace       target_label: kubernetes_namespace     - source_labels: [__meta_kubernetes_service_name]       action: replace       target_label: kubernetes_name

prometheus-deployment.yaml:

apiVersion: extensions/v1beta1
 kind: Deployment
 metadata:
   name: prometheus-deployment
   labels:
     app: prometheus-server
   namespace: monitoring
 spec:
   replicas: 1
   template:
     metadata:
       labels:
         app: prometheus-server
     spec:
       containers:
         - name: prometheus
           image: prom/prometheus
           args:
             - "--config.file=/etc/prometheus/prometheus.yml"
             - "--storage.tsdb.path=/prometheus/"
           ports:
             - containerPort: 9090
           volumeMounts:
             - name: prometheus-config-volume
               mountPath: /etc/prometheus/
             - name: prometheus-storage-volume
               mountPath: /prometheus/
       volumes:
         - name: prometheus-config-volume
           configMap:
             defaultMode: 420
             name: prometheus-server-conf
     - name: prometheus-storage-volume       emptyDir: {}

prometheus-service.yaml:

apiVersion: v1
 kind: Service
 metadata:
   name: prometheus-service
   namespace: monitoring
   annotations:
       prometheus.io/scrape: 'true'
       prometheus.io/path:   /
       prometheus.io/port:   '8080'
 spec:
   selector:
     app: prometheus-server
   type: NodePort
   ports:
     - port: 8080
       targetPort: 9090
   selector:
     app: prometheus-server

prometheus-ingress-service.yml:

apiVersion: networking.k8s.io/v1beta1
 kind: Ingress
 metadata:
   name: prometheus-ingress
   namespace: monitoring
 spec:
   tls:
     - hosts:
       - prom.example.com
       secretName: wildcard.example.com.crt
   rules:
 host: prom.example.com http:   paths: path: /
 backend:
   serviceName: prometheus-service
   servicePort: 8080
kubectl apply -f prometheus-cluster-role.yaml
kubectl apply -f prometheus-config-map.yaml
kubectl apply -f prometheus-deployment.yaml
kubectl apply -f prometheus-service.yaml
kubectl apply -f prometheus-ingress-service.yml

Grafana Deployment

grafana-configmap.yaml:

apiVersion: v1
 kind: ConfigMap
 metadata:
   name: cluster-monitoring-grafana-ini
   namespace: monitoring
   labels:
     app.kubernetes.io/name: cluster-monitoring
     app.kubernetes.io/component: grafana
 data:
   # Grafana's main configuration file. To learn more about the configuration options available to you,
   # consult https://grafana.com/docs/installation/configuration
   grafana.ini: |
     [analytics]
     check_for_updates = true
     [grafana_net]
     url = https://grafana.example.com
     [log]
     mode = console
     [paths]
     data = /var/lib/grafana/data
     logs = /var/log/grafana
     plugins = /var/lib/grafana/plugins
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: cluster-monitoring-grafana-datasources
   namespace: monitoring
   labels:
     app.kubernetes.io/name: cluster-monitoring
 data:
   # A file that specifies data sources for Grafana to use to populate dashboards.
   # To learn more about configuring this, consult https://grafana.com/docs/administration/provisioning/#datasources
   datasources.yaml: |
     apiVersion: 1
     datasources:
     - access: proxy
       isDefault: true
       name: prometheus
       type: prometheus
       url: http://prometheus-service.monitoring:8080
       version: 1

grafana-pv-data.yml:

apiVersion: v1
 kind: PersistentVolume
 metadata:
   name: grafana-data
   namespace: monitoring
   labels:
     name: grafana-data
 spec:
   capacity:
     storage: 200Gi
   accessModes:
 ReadWriteOnce persistentVolumeReclaimPolicy: Retain storageClassName: local-storage local: path: /var/lib/k8s/volumes/grafana/data nodeAffinity: required:   nodeSelectorTerms: matchExpressions: key: kubernetes.io/hostname
 operator: In
 values:
 k8s-02

grafana-secret.yaml:

apiVersion: v1
 kind: Secret
 metadata:
   name: cluster-monitoring-grafana
   namespace: monitoring
   labels:
     app.kubernetes.io/name: cluster-monitoring
     app.kubernetes.io/component: grafana
 type: Opaque
 data:
   # By default, admin-user is set to admin
   admin-user: YWRtaW4=
   admin-password: "base64encodedpassword"

grafana-serviceaccount.yaml:

apiVersion: v1
 kind: ServiceAccount
 metadata:
   name: grafana
   namespace: monitoring

grafana-service.yaml:

apiVersion: v1
 kind: Service
 metadata:
   name: grafana-service
   namespace: monitoring
   labels:
     k8s-app: grafana
     app.kubernetes.io/name: cluster-monitoring
     app.kubernetes.io/component: grafana
 spec:
   ports:
     # Routes port 80 to port 3000 of the Grafana StatefulSet Pods
     - name: http
       port: 80
       protocol: TCP
       targetPort: 3000
   selector:
     k8s-app: grafana

grafana-statefulset.yaml:

apiVersion: apps/v1beta2
 kind: StatefulSet
 metadata:
   name: cluster-monitoring-grafana
   namespace: monitoring
   labels: &Labels
     k8s-app: grafana
     app.kubernetes.io/name: cluster-monitoring
     app.kubernetes.io/component: grafana
 spec:
   serviceName: cluster-monitoring-grafana
   replicas: 1
   selector:
     matchLabels: *Labels
   template:
     metadata:
       labels: *Labels
     spec:
       serviceAccountName: grafana
       # Configure an init container that will chmod 777 Grafana's data directory
       # and volume before the main Grafana container starts up.
       # To learn more about init containers, consult https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
       # from the official Kubernetes docs.
       initContainers:
           - name: "init-chmod-data"
             image: debian:9
             imagePullPolicy: "IfNotPresent"
             command: ["chmod", "777", "/var/lib/grafana"]
             volumeMounts:
             - name: grafana-data
               mountPath: "/var/lib/grafana"
       containers:
         - name: grafana
           # The main Grafana container, which uses the grafana/grafana:6.0.1 image
           # from https://hub.docker.com/r/grafana/grafana
           image: grafana/grafana:6.2.5
           imagePullPolicy: Always
           # Mount in all the previously defined ConfigMaps as volumeMounts
           # as well as the Grafana data volume
           volumeMounts:
             - name: config
               mountPath: "/etc/grafana/"
             - name: datasources
               mountPath: "/etc/grafana/provisioning/datasources/"
             - name: grafana-data
               mountPath: "/var/lib/grafana"
           ports:
             - name: service
               containerPort: 80
               protocol: TCP
             - name: grafana
               containerPort: 3000
               protocol: TCP
           # Set the GF_SECURITY_ADMIN_USER and GF_SECURITY_ADMIN_PASSWORD environment variables
           # using the Secret defined in grafana-secret.yaml
           env:
             - name: GF_SECURITY_ADMIN_USER
               valueFrom:
                 secretKeyRef:
                   name: cluster-monitoring-grafana
                   key: admin-user
             - name: GF_SECURITY_ADMIN_PASSWORD
               valueFrom:
                 secretKeyRef:
                   name: cluster-monitoring-grafana
                   key: admin-password
           # Define a liveness and readiness probe that will hit /api/health using port 3000.
           # To learn more about Liveness and Readiness Probes,
           # consult https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/
           # from the official Kubernetes docs.
           livenessProbe:
             httpGet:
               path: /api/health
               port: 3000
             initialDelaySeconds: 60
             timeoutSeconds: 30
             failureThreshold: 10
             periodSeconds: 10
           readinessProbe:
             httpGet:
               path: /api/health
               port: 3000
             initialDelaySeconds: 60
             timeoutSeconds: 30
             failureThreshold: 10
             periodSeconds: 10
           # Define resource limits and requests of 50m of CPU and 100Mi of memory.
           resources:
             limits:
               cpu: 50m
               memory: 100Mi
             requests:
               cpu: 50m
               memory: 100Mi
       # Define configMap volumes for the above ConfigMap files, and volumeClaimTemplates
       # for Grafana's 2Gi Block Storage data volume, which will be mounted to /var/lib/grafana.
       volumes:
         - name: config
           configMap:
             name: cluster-monitoring-grafana-ini
         - name: datasources
           configMap:
             name: cluster-monitoring-grafana-datasources
   volumeClaimTemplates:
 metadata:
   name: grafana-data
 spec:
   accessModes: [ "ReadWriteOnce" ]
   storageClassName: "local-storage"
   resources:
     requests:
       storage: 200Gi
   selector:
     matchExpressions:
       - {key: name, operator: In, values: [grafana-data]}

grafana-ingress-service.yml:

apiVersion: networking.k8s.io/v1beta1
 kind: Ingress
 metadata:
   name: grafana-ingress
   namespace: monitoring
 spec:
   tls:
     - hosts:
       - grafana.example.com
       secretName: wildcard.example.com.crt
   rules:
 host: grafana.example.com http:   paths: path: /
 backend:
   serviceName: grafana-service
   servicePort: 80
kubectl apply -f grafana-configmap.yaml
kubectl apply -f grafana-pv-data.yml
kubectl apply -f grafana-secret.yaml
kubectl apply -f grafana-serviceaccount.yaml
kubectl apply -f grafana-service.yaml
kubectl apply -f grafana-statefulset.yaml
kubectl apply -f grafana-ingress-service.yml

https://grafana.com/grafana/dashboards/10000 https://grafana.com/grafana/dashboards/315