ryaneorth/k8s-scheduled-volume-snapshotter

notification

Closed this issue · 4 comments

any suggestion on how to get notifications some way if snapshots are not done for any event, without using prometheus or grafana alerts? We found yesterday that the snapshotter pod was in pending state and did not create any snapshot for latest 12 days...

Hi @fragolinux - I do not know of a great way to do this without leveraging an external monitoring tool like Prometheus/Grafana. For my own usage of this repository I have a Grafana notification to alert me if there has not been a snapshotter pod executed within a certain time threshold.

Good, can you please share the dashboard json? Thanks!

You may need to modify the frequency of the checks based on how often you expect the scheduled volume snapshot pods to run, but I've found this to be adequate for my use case:

{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": {
          "type": "grafana",
          "uid": "-- Grafana --"
        },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "target": {
          "limit": 100,
          "matchAny": false,
          "tags": [],
          "type": "dashboard"
        },
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 0,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                1
              ],
              "type": "lt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "A",
                "30m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "max"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "keep_state",
        "for": "1m",
        "frequency": "30m",
        "handler": 1,
        "message": "There have not been any successful scheduled volume snapshot pods in the past 30 minutes",
        "name": "Successful Scheduled Volume Snapshot Pods alert",
        "noDataState": "no_data",
        "notifications": [
          {
            "uid": "<redacted>"
          }
        ]
      },
      "datasource": {
        "type": "prometheus",
        "uid": "<redacted>"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "auto",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 11,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "<redacted>"
          },
          "expr": "sum(kube_pod_status_phase{phase=\"Succeeded\", namespace=\"scheduled-volume-snapshotter\"})",
          "refId": "A"
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "lt",
          "value": 1,
          "visible": true
        }
      ],
      "title": "Successful Scheduled Volume Snapshot Pods",
      "type": "timeseries"
    }
  ],
  "schemaVersion": 36,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "Scheduled Volume Snapshotter Health",
  "version": 0,
  "weekStart": ""
}

thank you!