Breaking: Rename minitor_check_milliseconds and minitor_monitor_up_count

To conform with Prometheus metric name best practices, these have been renamed as follows: * `minitor_check_milliseconds` to `minitor_check_seconds` * `minitor_monitor_up_count` to `minitor_monitor_up`
2022-12-19 15:45:23 -08:00
9 changed files with 36 additions and 83 deletions
@@ -9,6 +9,7 @@ linters:
    - gomnd
    - goprintffuncname
    - misspell
    - promlinter
    - tagliatelle
    - tenv
    - testpackage
@@ -55,9 +55,6 @@ The global configurations are:
 |key|value|
 |---|---|
 |`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
 |`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
 |`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
 |`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
 |`monitors`|List of all monitors. Detailed description below|
 |`alerts`|List of all alerts. Detailed description below|
@@ -14,10 +14,6 @@ var errInvalidConfig = errors.New("Invalid configuration")
 // Config type is contains all provided user configuration
 type Config struct {
 	CheckInterval SecondsOrDuration `yaml:"check_interval"`
 	DefaultAlertAfter int16             `yaml:"default_alert_after"`
 	DefaultAlertEvery *int16            `yaml:"default_alert_every"`
 	DefaultAlertDown  []string          `yaml:"default_alert_down"`
 	DefaultAlertUp    []string          `yaml:"default_alert_up"`
 	Monitors      []*Monitor
 	Alerts        map[string]*Alert
 }
@@ -139,27 +135,8 @@ func (config Config) IsValid() (isValid bool) {
 // Init performs extra initialization on top of loading the config from file
 func (config *Config) Init() (err error) {
 	for _, monitor := range config.Monitors {
 		if monitor.AlertAfter == 0 && config.DefaultAlertAfter > 0 {
 			monitor.AlertAfter = config.DefaultAlertAfter
 		}
 		if monitor.AlertEvery == nil && config.DefaultAlertEvery != nil {
 			monitor.AlertEvery = config.DefaultAlertEvery
 		}
 		if len(monitor.AlertDown) == 0 && len(config.DefaultAlertDown) > 0 {
 			monitor.AlertDown = config.DefaultAlertDown
 		}
 		if len(monitor.AlertUp) == 0 && len(config.DefaultAlertUp) > 0 {
 			monitor.AlertUp = config.DefaultAlertUp
 		}
 	}
 	for name, alert := range config.Alerts {
 		alert.Name = name
 		if err = alert.BuildTemplates(); err != nil {
 			return
 		}
@@ -14,7 +14,6 @@ func TestLoadConfig(t *testing.T) {
 		pyCompat   bool
 	}{
 		{"./test/valid-config.yml", false, "Valid config file", false},
 		{"./test/valid-config-default-values.yml", false, "Valid config file with default values", false},
 		{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
 		{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
 		{"./test/does-not-exist", true, "Invalid config path", false},
@@ -76,7 +76,7 @@ func checkMonitors(config *Config) error {
 			// Track status metrics
 			Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
-			Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
+			Metrics.CountCheck(monitor.Name, success, monitor.LastCheckSeconds(), hasAlert)
 			if alertNotice != nil {
 				err := sendAlerts(config, monitor, alertNotice)
@@ -43,14 +43,14 @@ func NewMetrics() *MinitorMetrics {
 		),
 		checkTime: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
-				Name: "minitor_check_milliseconds",
+				Name: "minitor_check_seconds",
 				Help: "Time in miliseconds that a check ran for",
 			},
 			[]string{"monitor", "status"},
 		),
 		monitorStatus: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
-				Name: "minitor_monitor_up_count",
+				Name: "minitor_monitor_up",
 				Help: "Status of currently responsive monitors",
 			},
 			[]string{"monitor"},
@@ -77,7 +77,7 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
 }
 // CountCheck counts the result of a particular Monitor check
-func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
+func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, secs float64, isAlert bool) {
 	status := "failure"
 	if isSuccess {
 		status = "success"
@@ -94,7 +94,7 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int
 	metrics.checkTime.With(
 		prometheus.Labels{"monitor": monitor, "status": status},
-	).Set(float64(ms))
+	).Set(secs)
 }
 // CountAlert counts an alert
@@ -12,7 +12,7 @@ import (
 type Monitor struct { //nolint:maligned
 	// Config values
 	AlertAfter    int16             `yaml:"alert_after"`
-	AlertEvery    *int16            `yaml:"alert_every"`
+	AlertEvery    int16             `yaml:"alert_every"`
 	CheckInterval SecondsOrDuration `yaml:"check_interval"`
 	Name          string
 	AlertDown     []string `yaml:"alert_down"`
@@ -91,9 +91,9 @@ func (monitor Monitor) IsUp() bool {
 	return monitor.alertCount == 0
 }
-// LastCheckMilliseconds gives number of miliseconds the last check ran for
+// LastCheckSeconds gives number of seconds the last check ran for
-func (monitor Monitor) LastCheckMilliseconds() int64 {
+func (monitor Monitor) LastCheckSeconds() float64 {
-	return monitor.lastCheckDuration.Milliseconds()
+	return monitor.lastCheckDuration.Seconds()
 }
 func (monitor *Monitor) success() (notice *AlertNotice) {
@@ -129,14 +129,14 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
 	// Use alert cadence to determine if we should alert
 	switch {
-	case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
+	case monitor.AlertEvery > 0:
-		// Handle alerting on first failure only
+		// Handle integer number of failures before alerting
-		if failureCount == 0 {
+		if failureCount%monitor.AlertEvery == 0 {
 			notice = monitor.createAlertNotice(false)
 		}
-	case *monitor.AlertEvery > 0:
+	case monitor.AlertEvery == 0:
-		// Handle integer number of failures before alerting
+		// Handle alerting on first failure only
-		if failureCount%*monitor.AlertEvery == 0 {
+		if failureCount == 0 {
 			notice = monitor.createAlertNotice(false)
 		}
 	default:
@@ -141,19 +141,17 @@ func TestMonitorSuccess(t *testing.T) {
 // TestMonitorFailureAlertAfter tests that alerts will not trigger until
 // hitting the threshold provided by AlertAfter
 func TestMonitorFailureAlertAfter(t *testing.T) {
 	var alertEvery int16 = 1
 	cases := []struct {
 		monitor      Monitor
 		expectNotice bool
 		name         string
 	}{
 		{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: first failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: second failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
-		{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: &alertEvery}, false, "Alert after 20: first failure"},
+		{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
-		{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 20th failure"},
+		{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
-		{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 21st failure"},
+		{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
 	}
 	for _, c := range cases {
@@ -174,11 +172,6 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
 // TestMonitorFailureAlertEvery tests that alerts will trigger
 // on the expected intervals
 func TestMonitorFailureAlertEvery(t *testing.T) {
 	var alertEvery0, alertEvery1, alertEvery2 int16
 	alertEvery0 = 0
 	alertEvery1 = 1
 	alertEvery2 = 2
 	cases := []struct {
 		monitor      Monitor
 		expectNotice bool
@@ -193,20 +186,20 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
 			For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
 		*/
-		{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to nil
+		{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
 		// Alert first time only, after 1
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery0}, true, "Alert first time only after 1: first failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: second failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
-		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: third failure"},
+		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
 		// Alert every time, after 1
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: first failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: second failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
-		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: third failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
 		// Alert every other time, after 1
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: first failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: second failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
-		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: third failure"},
+		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
-		{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: fourth failure"},
+		{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
 	}
 	for _, c := range cases {
@@ -227,8 +220,6 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
 // TestMonitorFailureExponential tests that alerts will trigger
 // with an exponential backoff after repeated failures
 func TestMonitorFailureExponential(t *testing.T) {
 	var alertEveryExp int16 = -1
 	cases := []struct {
 		expectNotice bool
 		name         string
@@ -245,7 +236,7 @@ func TestMonitorFailureExponential(t *testing.T) {
 	// Unlike previous tests, this one requires a static Monitor with repeated
 	// calls to the failure method
-	monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEveryExp}
+	monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
 	for _, c := range cases {
 		log.Printf("Testing case %s", c.name)
@@ -1,12 +0,0 @@
 ---
 check_interval: 1
 default_alert_down: ["log_command"]
 default_alert_after: 1
 monitors:
  - name: Command
    command: ["echo", "$PATH"]
 alerts:
  log_command:
    command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]