一、环境
主机名  | IP地址  | 系统  | 说明  | 
k8s  | 192.168.11.65  | Ubuntu 20.04  | k8s版本:v1.23.10 单机版本  | 
serviemonitor
promehtuerules
1、准备环境
略
二、监控redis
1、安装redis
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo update
helm search repo bitnami/redis 
helm pull bitnami/redis --version 17.9.2
tar xf redis-17.9.2.tgz
编辑配置文件
 vim redis/values.yaml
修改配置如下:设置密码为123456,设置从节点数量,关闭持久化存储
global:
  redis:
    password: "123456"
#关闭持久化存储
persistence:
  enabled: false
#设置一个从节点
replica:
  replicaCount: 1
通过sed修改
sed -i '/redis:/{n;s#password: ""#password: "123456"#}' redis/values.yaml 
sed -i '/  persistence:/{n;n;n;s#enabled: true#enabled: false#}' redis/values.yaml 
sed -i '/^replica:/{n;n;n;s#replicaCount: 3#replicaCount: 1#}' redis/values.yaml
检查
egrep -A3 "password: |  persistence:|^replica:" redis/values.yaml
安装
helm install -n monitoring --create-namespace redis redis
检测redis登陆
kubectl exec -it redis-master-0 -n monitoring -- redis-cli -a 123456 
127.0.0.1:6379> info
2、创建Deployment部署redis-exporter(三选一)
安装redis-exporter
cat > redis-exporter.yaml <<"EOF"
---  
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: redis-exporter
  name: redis-exporter-master
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app: redis-exporter
  template:
    metadata:
      labels:
        app: redis-exporter
    spec:
      containers:
      - name: redis-exporter
        image: oliver006/redis_exporter:latest
        env:
        - name: TZ
          value: "Asia/Shanghai"
        - name: REDIS_ADDR
          #地址和密码根据实际填写
          value: "redis://redis-master:6379"
        - name: REDIS_PASSWORD
          value: "123456"
        resources:
          requests:
            cpu: 100m
            memory: 100Mi
        ports:
        - name: metrics
          containerPort: 9121
          protocol: TCP
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: redis-exporter
  name: redis-exporter-replicas
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app: redis-exporter
  template:
    metadata:
      labels:
        app: redis-exporter
    spec:
      containers:
      - name: redis-exporter
        image: oliver006/redis_exporter:latest
        env:
        - name: TZ
          value: "Asia/Shanghai"
        - name: REDIS_ADDR
          #地址和密码根据实际填写
          value: "redis://redis-replicas:6379"
        - name: REDIS_PASSWORD
          value: "123456"
        resources:
          requests:
            cpu: 100m
            memory: 100Mi
        ports:
        - name: metrics
          containerPort: 9121
          protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app: redis-exporter
  name: redis-exporter
  namespace: monitoring
spec:
  ports:
  - name: http-metirc
    protocol: TCP
    port: 9121
    targetPort: metrics
  selector:
    app: redis-exporter
EOF
创建
kubectl create -f redis-exporter.yaml
检查
kubectl get -f redis-exporter.yaml
Prometheus添加配置
添加ServiceMonitor资源对象,使prometheus去收集redis_exporter提供的监控样本数据
使用cat创建redis-sm.yaml文件
cat >redis-sm.yaml<<"EOF"
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: redis-exporter
  namespace: monitoring
  labels:
    app: redis-exporter
    release: prometheus
spec:
  #jobLabel: redis-exporter
  endpoints:
  #http-metirc为redis-exporter的Service端口的name,一定要一致
  - port: http-metirc
    interval: 30s
    scheme: http
  selector:
    matchLabels:
      app: redis-exporter
  namespaceSelector:
    matchNames:
    - monitoring
EOF
参数解释:
endpoints:用于配置需要收集 metrics 的 Endpoints 的端口和其他参数(注意:endpoints(小写)是 ServiceMonitor CRD 中的一个字段,而 Endpoints(大写)是 Kubernetes 资源类型)
selector:通过label匹配的方式获取serviceMonitor访问到的后端程序,通常都是是exporter对应的service
创建
kubectl create -f redis-sm.yaml
检查
 kubectl get -f redis-sm.yaml
http://192.168.11.65:9090/targets
注意事项

添加告警规则(触发器)
从这里下载告警文件
https://awesome-prometheus-alerts.grep.to/rules.html#host-and-hardware
wget https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/redis/oliver006-redis-exporter.yml
创建PrometheusRule资源对象
cat >> redis-exporter-rules.yml <<"EOF"
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    release: prometheus
  name: redis-exporter-rules
  namespace: monitoring
spec:
  groups:
  - name: Redis
    rules:
      - alert: RedisDown
        expr: 'redis_up == 0'
        for: 0m
        labels:
          severity: critical
        annotations:
          summary: Redis down (instance {{ $labels.instance }})
          description: "Redis instance is down
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisMissingMaster
        expr: '(count(redis_instance_info{role="master"}) or vector(0)) < 1'
        for: 0m
        labels:
          severity: critical
        annotations:
          summary: Redis missing master (instance {{ $labels.instance }})
          description: "Redis cluster has no node marked as master.
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisTooManyMasters
        expr: 'count(redis_instance_info{role="master"}) > 1'
        for: 0m
        labels:
          severity: critical
        annotations:
          summary: Redis too many masters (instance {{ $labels.instance }})
          description: "Redis cluster has too many nodes marked as master.
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisDisconnectedSlaves
        expr: 'count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1'
        for: 0m
        labels:
          severity: critical
        annotations:
          summary: Redis disconnected slaves (instance {{ $labels.instance }})
          description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisReplicationBroken
        expr: 'delta(redis_connected_slaves[1m]) < 0'
        for: 0m
        labels:
          severity: critical
        annotations:
          summary: Redis replication broken (instance {{ $labels.instance }})
          description: "Redis instance lost a slave
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisClusterFlapping
        expr: 'changes(redis_connected_slaves[1m]) > 1'
        for: 2m
        labels:
          severity: critical
        annotations:
          summary: Redis cluster flapping (instance {{ $labels.instance }})
          description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisMissingBackup
        expr: 'time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24'
        for: 0m
        labels:
          severity: critical
        annotations:
          summary: Redis missing backup (instance {{ $labels.instance }})
          description: "Redis has not been backuped for 24 hours
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisOutOfSystemMemory
        expr: 'redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90'
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: Redis out of system memory (instance {{ $labels.instance }})
          description: "Redis is running out of system memory (> 90%)
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisOutOfConfiguredMaxmemory
        expr: 'redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90'
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
          description: "Redis is running out of configured maxmemory (> 90%)
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisTooManyConnections
        expr: 'redis_connected_clients > 100'
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: Redis too many connections (instance {{ $labels.instance }})
          description: "Redis instance has too many connections
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisNotEnoughConnections
        expr: 'redis_connected_clients < 5'
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: Redis not enough connections (instance {{ $labels.instance }})
          description: "Redis instance should have more connections (> 5)
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
      
      - alert: RedisRejectedConnections
        expr: 'increase(redis_rejected_connections_total[1m]) > 0'
        for: 0m
        labels:
          severity: critical
        annotations:
          summary: Redis rejected connections (instance {{ $labels.instance }})
          description: "Some connections to Redis has been rejected
  VALUE = {{ $value }}
  LABELS = {{ $labels }}"
EOF
创建
redis-exporter-rules.yml
检查
 kubectl get -f redis-exporter-rules.yml
http://192.168.11.65:9090/alerts?search=
3、修改配置安装redis-exporter(三选一)
- 
修改配置,安装redis_exporter并监控,添加告警规则
 
bitnami/redis
修改配置
vim redis/values.yaml
metrics:
  enabled: true
  serviceMonitor:
    enabled: true
    additionalLabels:
      release: prometheus
  prometheusRule:
    enabled: false
    additionalLabels:
      release: prometheus
更新配置
helm upgrade -n monitoring --create-namespace redis redis
检查pod
 kubectl get pod -n monitoring
创建PrometheusRule文件
参考上面
检查ServiceMonitor
kubectl get servicemonitors redis -n monitoring -oyaml
检查PrometheusRule
kubectl get prometheusrules redis -n monitoring -oyaml
web检查
http://192.168.11.65:9090/targets?search=

http://192.168.11.65:9090/alerts?search= 检查

4、prometheus-redis-exporter(三选一)
安装prometheus-redis-exporter
- 
通过prometheus-community仓库安装prometheus-redis-exporter
 
helm search repo prometheus-community|grep redis
prometheus-community/prometheus-redis-exporter          5.3.1           v1.44.0         Prometheus exporter for Redis metrics 
helm fetch prometheus-community/prometheus-redis-exporter
tar xf prometheus-redis-exporter-5.3.1.tgz
通过wget下载
wget https://github.com/prometheus-community/helm-charts/releases/download/prometheus-redis-exporter-5.3.1/prometheus-redis-exporter-5.3.1.tgz
tar xf prometheus-redis-exporter-5.3.1.tgz
grep -A 2 'image:' prometheus-redis-exporter/*
#不用替换
修改配置
vim prometheus-redis-exporter/values.yaml
开启serviceMonitor和prometheusRule,添加lables: release: prometheus
从 5.0.0 开始,redis exporter helm chart 支持多个目标。
通过启用serviceMonitor.multipleTarget和设置目标serviceMonitor.targets,可以抓取多个 redis 实例。
redisAddress: redis://redis-master:6379
serviceMonitor:
  enabled: true
auth:
  enabled: true
  secret:
    name: "redis"
    key: "redis-password"
  #redisPassword: "123456"
  #如果要使用redisPassword: "123456"这个参数,vim prometheus-redis-exporter/templates/deployment.yaml 
  #把value: {{ .Values.auth.redisPassword }}修改为value: "{{ .Values.auth.redisPassword }}",加双引号
helm安装
helm install  -n monitoring --create-namespace prometheus-redis-exporter prometheus-redis-exporter
检查metrics
root@k8s:~# kubectl get svc  -n monitoring
NAME                                      TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)                      
prometheus-redis-exporter                 ClusterIP   10.233.48.200   <none>        9121/TCP                     11m
#通过curl检查
curl 10.233.48.200:9121/metrics
检查servicemonitors
kubectl get servicemonitors prometheus-redis-exporter -n monitoring -oyaml
检查prometheusrules
kubectl get prometheusrules prometheus-redis-exporter -n monitoring -oyaml
5、问题
http://192.168.11.65:9090/targets?search= 检查没有redis
解决:
检查prometheus-mysql-exporter的ServiceMonitor配置的labels是否有release: prometheus
kubectl get ServiceMonitor prometheus-redis-exporter -n monitoring -oyaml
  labels:
    release: prometheus #是否有这行
    
#如果没有通过命令增加
kubectl label servicemonitors prometheus-redis-exporter release=prometheus -n monitoring
完成后检查
6、grafana添加dashboard
id:11835
id:17507
集群方式监控,id:14615
图形展示成功。图形问题是因为最大内存没有设置

总:
二进制:安装redis-exporter 修改prometheus配置(增加metrics地址,重启promehtues),增加告警规则,dashboard
K8S: 安装redis-exporter 添加Servicemonitor, 增加prometheusrules dashboard
            
          
            
          
          
评论区