Compare commits

..

1 Commits

Author SHA1 Message Date
Ian Fijolek
60cfac948b Breaking: Rename minitor_check_milliseconds and minitor_monitor_up_count
To conform with Prometheus metric name best practices, these have been
renamed as follows:

  * `minitor_check_milliseconds` to `minitor_check_seconds`
  * `minitor_monitor_up_count` to `minitor_monitor_up`
2022-12-19 15:45:23 -08:00
9 changed files with 36 additions and 83 deletions
+1
View File
@@ -9,6 +9,7 @@ linters:
- gomnd - gomnd
- goprintffuncname - goprintffuncname
- misspell - misspell
- promlinter
- tagliatelle - tagliatelle
- tenv - tenv
- testpackage - testpackage
-3
View File
@@ -55,9 +55,6 @@ The global configurations are:
|key|value| |key|value|
|---|---| |---|---|
|`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.| |`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
|`monitors`|List of all monitors. Detailed description below| |`monitors`|List of all monitors. Detailed description below|
|`alerts`|List of all alerts. Detailed description below| |`alerts`|List of all alerts. Detailed description below|
-23
View File
@@ -14,10 +14,6 @@ var errInvalidConfig = errors.New("Invalid configuration")
// Config type is contains all provided user configuration // Config type is contains all provided user configuration
type Config struct { type Config struct {
CheckInterval SecondsOrDuration `yaml:"check_interval"` CheckInterval SecondsOrDuration `yaml:"check_interval"`
DefaultAlertAfter int16 `yaml:"default_alert_after"`
DefaultAlertEvery *int16 `yaml:"default_alert_every"`
DefaultAlertDown []string `yaml:"default_alert_down"`
DefaultAlertUp []string `yaml:"default_alert_up"`
Monitors []*Monitor Monitors []*Monitor
Alerts map[string]*Alert Alerts map[string]*Alert
} }
@@ -139,27 +135,8 @@ func (config Config) IsValid() (isValid bool) {
// Init performs extra initialization on top of loading the config from file // Init performs extra initialization on top of loading the config from file
func (config *Config) Init() (err error) { func (config *Config) Init() (err error) {
for _, monitor := range config.Monitors {
if monitor.AlertAfter == 0 && config.DefaultAlertAfter > 0 {
monitor.AlertAfter = config.DefaultAlertAfter
}
if monitor.AlertEvery == nil && config.DefaultAlertEvery != nil {
monitor.AlertEvery = config.DefaultAlertEvery
}
if len(monitor.AlertDown) == 0 && len(config.DefaultAlertDown) > 0 {
monitor.AlertDown = config.DefaultAlertDown
}
if len(monitor.AlertUp) == 0 && len(config.DefaultAlertUp) > 0 {
monitor.AlertUp = config.DefaultAlertUp
}
}
for name, alert := range config.Alerts { for name, alert := range config.Alerts {
alert.Name = name alert.Name = name
if err = alert.BuildTemplates(); err != nil { if err = alert.BuildTemplates(); err != nil {
return return
} }
-1
View File
@@ -14,7 +14,6 @@ func TestLoadConfig(t *testing.T) {
pyCompat bool pyCompat bool
}{ }{
{"./test/valid-config.yml", false, "Valid config file", false}, {"./test/valid-config.yml", false, "Valid config file", false},
{"./test/valid-config-default-values.yml", false, "Valid config file with default values", false},
{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true}, {"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false}, {"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
{"./test/does-not-exist", true, "Invalid config path", false}, {"./test/does-not-exist", true, "Invalid config path", false},
+1 -1
View File
@@ -76,7 +76,7 @@ func checkMonitors(config *Config) error {
// Track status metrics // Track status metrics
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp()) Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert) Metrics.CountCheck(monitor.Name, success, monitor.LastCheckSeconds(), hasAlert)
if alertNotice != nil { if alertNotice != nil {
err := sendAlerts(config, monitor, alertNotice) err := sendAlerts(config, monitor, alertNotice)
+4 -4
View File
@@ -43,14 +43,14 @@ func NewMetrics() *MinitorMetrics {
), ),
checkTime: prometheus.NewGaugeVec( checkTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Name: "minitor_check_milliseconds", Name: "minitor_check_seconds",
Help: "Time in miliseconds that a check ran for", Help: "Time in miliseconds that a check ran for",
}, },
[]string{"monitor", "status"}, []string{"monitor", "status"},
), ),
monitorStatus: prometheus.NewGaugeVec( monitorStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Name: "minitor_monitor_up_count", Name: "minitor_monitor_up",
Help: "Status of currently responsive monitors", Help: "Status of currently responsive monitors",
}, },
[]string{"monitor"}, []string{"monitor"},
@@ -77,7 +77,7 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
} }
// CountCheck counts the result of a particular Monitor check // CountCheck counts the result of a particular Monitor check
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) { func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, secs float64, isAlert bool) {
status := "failure" status := "failure"
if isSuccess { if isSuccess {
status = "success" status = "success"
@@ -94,7 +94,7 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int
metrics.checkTime.With( metrics.checkTime.With(
prometheus.Labels{"monitor": monitor, "status": status}, prometheus.Labels{"monitor": monitor, "status": status},
).Set(float64(ms)) ).Set(secs)
} }
// CountAlert counts an alert // CountAlert counts an alert
+10 -10
View File
@@ -12,7 +12,7 @@ import (
type Monitor struct { //nolint:maligned type Monitor struct { //nolint:maligned
// Config values // Config values
AlertAfter int16 `yaml:"alert_after"` AlertAfter int16 `yaml:"alert_after"`
AlertEvery *int16 `yaml:"alert_every"` AlertEvery int16 `yaml:"alert_every"`
CheckInterval SecondsOrDuration `yaml:"check_interval"` CheckInterval SecondsOrDuration `yaml:"check_interval"`
Name string Name string
AlertDown []string `yaml:"alert_down"` AlertDown []string `yaml:"alert_down"`
@@ -91,9 +91,9 @@ func (monitor Monitor) IsUp() bool {
return monitor.alertCount == 0 return monitor.alertCount == 0
} }
// LastCheckMilliseconds gives number of miliseconds the last check ran for // LastCheckSeconds gives number of seconds the last check ran for
func (monitor Monitor) LastCheckMilliseconds() int64 { func (monitor Monitor) LastCheckSeconds() float64 {
return monitor.lastCheckDuration.Milliseconds() return monitor.lastCheckDuration.Seconds()
} }
func (monitor *Monitor) success() (notice *AlertNotice) { func (monitor *Monitor) success() (notice *AlertNotice) {
@@ -129,14 +129,14 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
// Use alert cadence to determine if we should alert // Use alert cadence to determine if we should alert
switch { switch {
case monitor.AlertEvery == nil, *monitor.AlertEvery == 0: case monitor.AlertEvery > 0:
// Handle alerting on first failure only // Handle integer number of failures before alerting
if failureCount == 0 { if failureCount%monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false) notice = monitor.createAlertNotice(false)
} }
case *monitor.AlertEvery > 0: case monitor.AlertEvery == 0:
// Handle integer number of failures before alerting // Handle alerting on first failure only
if failureCount%*monitor.AlertEvery == 0 { if failureCount == 0 {
notice = monitor.createAlertNotice(false) notice = monitor.createAlertNotice(false)
} }
default: default:
+17 -26
View File
@@ -141,19 +141,17 @@ func TestMonitorSuccess(t *testing.T) {
// TestMonitorFailureAlertAfter tests that alerts will not trigger until // TestMonitorFailureAlertAfter tests that alerts will not trigger until
// hitting the threshold provided by AlertAfter // hitting the threshold provided by AlertAfter
func TestMonitorFailureAlertAfter(t *testing.T) { func TestMonitorFailureAlertAfter(t *testing.T) {
var alertEvery int16 = 1
cases := []struct { cases := []struct {
monitor Monitor monitor Monitor
expectNotice bool expectNotice bool
name string name string
}{ }{
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0 {Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: first failure"}, {Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: second failure"}, {Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: &alertEvery}, false, "Alert after 20: first failure"}, {Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 20th failure"}, {Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 21st failure"}, {Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
} }
for _, c := range cases { for _, c := range cases {
@@ -174,11 +172,6 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
// TestMonitorFailureAlertEvery tests that alerts will trigger // TestMonitorFailureAlertEvery tests that alerts will trigger
// on the expected intervals // on the expected intervals
func TestMonitorFailureAlertEvery(t *testing.T) { func TestMonitorFailureAlertEvery(t *testing.T) {
var alertEvery0, alertEvery1, alertEvery2 int16
alertEvery0 = 0
alertEvery1 = 1
alertEvery2 = 2
cases := []struct { cases := []struct {
monitor Monitor monitor Monitor
expectNotice bool expectNotice bool
@@ -193,20 +186,20 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn! For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
*/ */
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to nil {Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
// Alert first time only, after 1 // Alert first time only, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery0}, true, "Alert first time only after 1: first failure"}, {Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: second failure"}, {Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: third failure"}, {Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
// Alert every time, after 1 // Alert every time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: first failure"}, {Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: second failure"}, {Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: third failure"}, {Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
// Alert every other time, after 1 // Alert every other time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: first failure"}, {Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: second failure"}, {Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: third failure"}, {Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: fourth failure"}, {Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
} }
for _, c := range cases { for _, c := range cases {
@@ -227,8 +220,6 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
// TestMonitorFailureExponential tests that alerts will trigger // TestMonitorFailureExponential tests that alerts will trigger
// with an exponential backoff after repeated failures // with an exponential backoff after repeated failures
func TestMonitorFailureExponential(t *testing.T) { func TestMonitorFailureExponential(t *testing.T) {
var alertEveryExp int16 = -1
cases := []struct { cases := []struct {
expectNotice bool expectNotice bool
name string name string
@@ -245,7 +236,7 @@ func TestMonitorFailureExponential(t *testing.T) {
// Unlike previous tests, this one requires a static Monitor with repeated // Unlike previous tests, this one requires a static Monitor with repeated
// calls to the failure method // calls to the failure method
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEveryExp} monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) log.Printf("Testing case %s", c.name)
-12
View File
@@ -1,12 +0,0 @@
---
check_interval: 1
default_alert_down: ["log_command"]
default_alert_after: 1
monitors:
- name: Command
command: ["echo", "$PATH"]
alerts:
log_command:
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]