Add default values for AlertEvery

There is also a test error corrected in TestMonitorFailureAlertEvery where the same test conditions were repeated twice.
Allow setting of global defaults for some values
2022-12-19 15:49:32 -08:00 · 2022-12-19 15:49:32 -08:00 · 2022-12-19 15:34:47 -08:00 · 2022-06-07 21:39:18 -07:00 · 2022-04-04 20:12:01 -07:00 · 2022-01-24 16:08:18 -08:00
13 changed files with 118 additions and 86 deletions
@@ -4,7 +4,7 @@ name: test

 steps:
  - name: test
-    image: golang:1.15
+    image: golang:1.17
    environment:
      VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
    commands:
@@ -30,7 +30,7 @@ trigger:

 steps:
  - name: build all binaries
-    image: golang:1.15
+    image: golang:1.17
    environment:
      VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
    commands:
@@ -1,32 +1,22 @@
 ---
 linters:
  enable:
-    - asciicheck
-    - bodyclose
-    - dogsled
-    - dupl
+    - errname
+    - errorlint
    - exhaustive
-    - gochecknoinits
-    - gocognit
-    - gocritic
-    - gocyclo
-    - goerr113
    - gofumpt
    - goimports
    - gomnd
    - goprintffuncname
-    # - gosec
-    # - ifshort
-    - interfacer
-    - maligned
    - misspell
-    - nakedret
-    - nestif
-    - nlreturn
-    - noctx
-    - unparam
+    - tagliatelle
+    - tenv
+    - testpackage
+    - thelper
+    - tparallel
+    - unconvert
+    - wrapcheck
    - wsl
-    # - errorlint
  disable:
    - gochecknoglobals

@@ -34,15 +24,13 @@ linters-settings:
  gosec:
    excludes:
      - G204
-#   gomnd:
-#     settings:
-#       mnd:
-#         ignored-functions: math.*
+  tagliatelle:
+    case:
+      rules:
+        yaml: snake

 issues:
  exclude-rules:
    - path: _test\.go
      linters:
-        - errcheck
        - gosec
-        - maligned
@@ -1,7 +1,7 @@
 ---
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.4.0
    hooks:
      - id: check-added-large-files
      - id: check-yaml
@@ -10,13 +10,11 @@ repos:
      - id: trailing-whitespace
      - id: end-of-file-fixer
      - id: check-merge-conflict
-  - repo: git://github.com/dnephin/pre-commit-golang
-    rev: v0.4.0
+  - repo: https://github.com/golangci/golangci-lint
+    rev: v1.50.1
    hooks:
-      - id: go-fmt
-      - id: go-imports
      - id: golangci-lint
  - repo: https://github.com/hadolint/hadolint
-    rev: v2.4.0
+    rev: v2.12.1-beta
    hooks:
      - id: hadolint
@@ -1,15 +1,11 @@
 ARG REPO=library
-FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static
-FROM ${REPO}/alpine:3.10
-
-# Copying all qemu files because amd64 doesn't exist and cannot condional copy
-COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
+FROM ${REPO}/alpine:3.12

 RUN mkdir /app
 WORKDIR /app/

 # Add common checking tools
-RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
+RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6

 # Add minitor user for running as non-root
 RUN addgroup -S minitor && adduser -S minitor -G minitor
@@ -1,7 +1,5 @@
 ARG REPO=library
-FROM golang:1.12-alpine AS builder
-
-RUN apk add --no-cache git=~2
+FROM golang:1.17 AS builder

 RUN mkdir /app
 WORKDIR /app
@@ -16,7 +14,7 @@ ARG VERSION=dev
 ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
 RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .

-FROM ${REPO}/alpine:3.10
+FROM ${REPO}/alpine:3.12
 RUN mkdir /app
 WORKDIR /app/

@@ -24,7 +22,7 @@ WORKDIR /app/
 COPY --from=builder /app/minitor .

 # Add common checking tools
-RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
+RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6

 # Add minitor user for running as non-root
 RUN addgroup -S minitor && adduser -S minitor -G minitor
@@ -55,6 +55,9 @@ The global configurations are:
 |key|value|
 |---|---|
 |`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
+|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
+|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
+|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
 |`monitors`|List of all monitors. Detailed description below|
 |`alerts`|List of all alerts. Detailed description below|

@@ -13,9 +13,13 @@ var errInvalidConfig = errors.New("Invalid configuration")

 // Config type is contains all provided user configuration
 type Config struct {
-	CheckInterval SecondsOrDuration `yaml:"check_interval"`
-	Monitors      []*Monitor
-	Alerts        map[string]*Alert
+	CheckInterval     SecondsOrDuration `yaml:"check_interval"`
+	DefaultAlertAfter int16             `yaml:"default_alert_after"`
+	DefaultAlertEvery *int16            `yaml:"default_alert_every"`
+	DefaultAlertDown  []string          `yaml:"default_alert_down"`
+	DefaultAlertUp    []string          `yaml:"default_alert_up"`
+	Monitors          []*Monitor
+	Alerts            map[string]*Alert
 }

 // CommandOrShell type wraps a string or list of strings
@@ -135,8 +139,27 @@ func (config Config) IsValid() (isValid bool) {

 // Init performs extra initialization on top of loading the config from file
 func (config *Config) Init() (err error) {
+	for _, monitor := range config.Monitors {
+		if monitor.AlertAfter == 0 && config.DefaultAlertAfter > 0 {
+			monitor.AlertAfter = config.DefaultAlertAfter
+		}
+
+		if monitor.AlertEvery == nil && config.DefaultAlertEvery != nil {
+			monitor.AlertEvery = config.DefaultAlertEvery
+		}
+
+		if len(monitor.AlertDown) == 0 && len(config.DefaultAlertDown) > 0 {
+			monitor.AlertDown = config.DefaultAlertDown
+		}
+
+		if len(monitor.AlertUp) == 0 && len(config.DefaultAlertUp) > 0 {
+			monitor.AlertUp = config.DefaultAlertUp
+		}
+	}
+
 	for name, alert := range config.Alerts {
 		alert.Name = name
+
 		if err = alert.BuildTemplates(); err != nil {
 			return
 		}
@@ -14,6 +14,7 @@ func TestLoadConfig(t *testing.T) {
 		pyCompat   bool
 	}{
 		{"./test/valid-config.yml", false, "Valid config file", false},
+		{"./test/valid-config-default-values.yml", false, "Valid config file with default values", false},
 		{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
 		{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
 		{"./test/does-not-exist", true, "Invalid config path", false},
@@ -36,6 +36,8 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
 			"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
 			alertNotice.MonitorName, alertNotice.IsUp,
 		)
+
+		return nil
 	}

 	for _, alertName := range alertNames {
@@ -66,10 +68,10 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
 }

 func checkMonitors(config *Config) error {
+	// TODO: Run this in goroutines and capture exceptions
 	for _, monitor := range config.Monitors {
 		if monitor.ShouldCheck() {
 			success, alertNotice := monitor.Check()
-
 			hasAlert := alertNotice != nil

 			// Track status metrics
@@ -77,7 +79,11 @@ func checkMonitors(config *Config) error {
 			Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)

 			if alertNotice != nil {
-				return sendAlerts(config, monitor, alertNotice)
+				err := sendAlerts(config, monitor, alertNotice)
+				// If there was an error in sending an alert, exit early and bubble it up
+				if err != nil {
+					return err
+				}
 			}
 		}
 	}
@@ -92,7 +98,7 @@ func main() {
 	flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
 	flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
 	flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
-	flag.IntVar(&MetricsPort, "metrics-port", 8080, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
+	flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
 	flag.Parse()

 	// Print version if flag is provided
@@ -116,9 +122,7 @@ func main() {
 	// Start main loop
 	for {
 		err = checkMonitors(&config)
-		if err != nil {
-			panic(err)
-		}
+		slog.OnErrPanicf(err, "Error checking monitors")

 		time.Sleep(config.CheckInterval.Value())
 	}
@@ -16,7 +16,7 @@ func TestCheckMonitors(t *testing.T) {
 		{
 			config: Config{
 				Monitors: []*Monitor{
-					&Monitor{
+					{
 						Name:    "Success",
 						Command: CommandOrShell{Command: []string{"true"}},
 					},
@@ -28,7 +28,7 @@ func TestCheckMonitors(t *testing.T) {
 		{
 			config: Config{
 				Monitors: []*Monitor{
-					&Monitor{
+					{
 						Name:       "Failure",
 						Command:    CommandOrShell{Command: []string{"false"}},
 						AlertAfter: 1,
@@ -41,7 +41,7 @@ func TestCheckMonitors(t *testing.T) {
 		{
 			config: Config{
 				Monitors: []*Monitor{
-					&Monitor{
+					{
 						Name:       "Success",
 						Command:    CommandOrShell{Command: []string{"ls"}},
 						alertCount: 1,
@@ -54,7 +54,7 @@ func TestCheckMonitors(t *testing.T) {
 		{
 			config: Config{
 				Monitors: []*Monitor{
-					&Monitor{
+					{
 						Name:       "Failure",
 						Command:    CommandOrShell{Command: []string{"false"}},
 						AlertDown:  []string{"unknown"},
@@ -68,7 +68,7 @@ func TestCheckMonitors(t *testing.T) {
 		{
 			config: Config{
 				Monitors: []*Monitor{
-					&Monitor{
+					{
 						Name:       "Success",
 						Command:    CommandOrShell{Command: []string{"true"}},
 						AlertUp:    []string{"unknown"},
@@ -82,7 +82,7 @@ func TestCheckMonitors(t *testing.T) {
 		{
 			config: Config{
 				Monitors: []*Monitor{
-					&Monitor{
+					{
 						Name:       "Failure",
 						Command:    CommandOrShell{Command: []string{"false"}},
 						AlertDown:  []string{"good"},
@@ -90,7 +90,7 @@ func TestCheckMonitors(t *testing.T) {
 					},
 				},
 				Alerts: map[string]*Alert{
-					"good": &Alert{
+					"good": {
 						Command: CommandOrShell{Command: []string{"true"}},
 					},
 				},
@@ -101,7 +101,7 @@ func TestCheckMonitors(t *testing.T) {
 		{
 			config: Config{
 				Monitors: []*Monitor{
-					&Monitor{
+					{
 						Name:       "Failure",
 						Command:    CommandOrShell{Command: []string{"false"}},
 						AlertDown:  []string{"bad"},
@@ -109,7 +109,7 @@ func TestCheckMonitors(t *testing.T) {
 					},
 				},
 				Alerts: map[string]*Alert{
-					"bad": &Alert{
+					"bad": {
 						Name:    "bad",
 						Command: CommandOrShell{Command: []string{"false"}},
 					},
@@ -12,7 +12,7 @@ import (
 type Monitor struct { //nolint:maligned
 	// Config values
 	AlertAfter    int16             `yaml:"alert_after"`
-	AlertEvery    int16             `yaml:"alert_every"`
+	AlertEvery    *int16            `yaml:"alert_every"`
 	CheckInterval SecondsOrDuration `yaml:"check_interval"`
 	Name          string
 	AlertDown     []string `yaml:"alert_down"`
@@ -129,16 +129,16 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {

 	// Use alert cadence to determine if we should alert
 	switch {
-	case monitor.AlertEvery > 0:
-		// Handle integer number of failures before alerting
-		if failureCount%monitor.AlertEvery == 0 {
-			notice = monitor.createAlertNotice(false)
-		}
-	case monitor.AlertEvery == 0:
+	case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
 		// Handle alerting on first failure only
 		if failureCount == 0 {
 			notice = monitor.createAlertNotice(false)
 		}
+	case *monitor.AlertEvery > 0:
+		// Handle integer number of failures before alerting
+		if failureCount%*monitor.AlertEvery == 0 {
+			notice = monitor.createAlertNotice(false)
+		}
 	default:
 		// Handle negative numbers indicating an exponential backoff
 		if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
@@ -141,17 +141,19 @@ func TestMonitorSuccess(t *testing.T) {
 // TestMonitorFailureAlertAfter tests that alerts will not trigger until
 // hitting the threshold provided by AlertAfter
 func TestMonitorFailureAlertAfter(t *testing.T) {
+	var alertEvery int16 = 1
+
 	cases := []struct {
 		monitor      Monitor
 		expectNotice bool
 		name         string
 	}{
 		{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
-		{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
-		{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
-		{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: first failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: second failure"},
+		{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: &alertEvery}, false, "Alert after 20: first failure"},
+		{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 20th failure"},
+		{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 21st failure"},
 	}

 	for _, c := range cases {
@@ -172,6 +174,11 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
 // TestMonitorFailureAlertEvery tests that alerts will trigger
 // on the expected intervals
 func TestMonitorFailureAlertEvery(t *testing.T) {
+	var alertEvery0, alertEvery1, alertEvery2 int16
+	alertEvery0 = 0
+	alertEvery1 = 1
+	alertEvery2 = 2
+
 	cases := []struct {
 		monitor      Monitor
 		expectNotice bool
@@ -186,20 +193,20 @@ func TestMonitorFailureAlertEvery(t *testing.T) {

 			For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
 		*/
-		{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
+		{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to nil
 		// Alert first time only, after 1
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
-		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery0}, true, "Alert first time only after 1: first failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: second failure"},
+		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: third failure"},
 		// Alert every time, after 1
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: first failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: second failure"},
+		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: third failure"},
 		// Alert every other time, after 1
-		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
-		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
-		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
-		{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
+		{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: first failure"},
+		{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: second failure"},
+		{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: third failure"},
+		{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: fourth failure"},
 	}

 	for _, c := range cases {
@@ -220,6 +227,8 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
 // TestMonitorFailureExponential tests that alerts will trigger
 // with an exponential backoff after repeated failures
 func TestMonitorFailureExponential(t *testing.T) {
+	var alertEveryExp int16 = -1
+
 	cases := []struct {
 		expectNotice bool
 		name         string
@@ -236,7 +245,7 @@ func TestMonitorFailureExponential(t *testing.T) {

 	// Unlike previous tests, this one requires a static Monitor with repeated
 	// calls to the failure method
-	monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
+	monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEveryExp}

 	for _, c := range cases {
 		log.Printf("Testing case %s", c.name)
@@ -0,0 +1,12 @@
+---
+check_interval: 1
+default_alert_down: ["log_command"]
+default_alert_after: 1
+
+monitors:
+  - name: Command
+    command: ["echo", "$PATH"]
+
+alerts:
+  log_command:
+    command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
Author	SHA1	Message	Date
Ian Fijolek	7d87c3d036	Add default values for AlertEvery There is also a test error corrected in TestMonitorFailureAlertEvery where the same test conditions were repeated twice.	2022-12-19 15:49:32 -08:00
Ian Fijolek	deec04bf0d	Allow setting of global defaults for some values This helps with reducing redundant config. Note: There is no default for `alert_every` because the zero value has a meaning and cannot be interpreted as an omission.	2022-12-19 15:49:32 -08:00
Ian Fijolek	958446050f	Update linters	2022-12-19 15:34:47 -08:00
Ian Fijolek	88e94642d9	Remove some hooks included in golangci-lint and upgrade existing	2022-06-07 21:39:18 -07:00
Ian Fijolek	bc83a51907	Switch pre-commit url for golang	2022-04-04 20:12:01 -07:00
Ian Fijolek	08b8932331	Update curl version	2022-01-24 16:08:18 -08:00
Ian Fijolek	9072d97bb8	Make linters happy	2022-01-24 10:39:53 -08:00
Ian Fijolek	cdd8a69669	Update go version	2021-12-01 14:47:58 -08:00
Ian Fijolek	3c14a02770	Continue checking all monitors after sending alert Previously this was mistakenly returning after sending an alert. Now all alerts will be sent unless there is an exception on one of them.	2021-09-02 10:20:04 -07:00
Ian Fijolek	328ea83c25	Some linting cleanup	2021-09-02 10:19:03 -07:00
Ian Fijolek	ce986e8d1d	Roll back to alpine:3.12 Looks like there is a clock issue with raspbian https://wiki.alpinelinux.org/wiki/Release_Notes_for_Alpine_3.13.0#time64_requirements	2021-05-12 19:06:41 -07:00
Ian Fijolek	31a4b484bf	Merge branch 'duration-intervals'	2021-05-12 18:32:12 -07:00
Ian Fijolek	444d060736	Remove qemu-user-static from Dockerfile and update alpine My build machine now has proper qemu support added, so this is not needed	2021-05-12 23:22:24 +00:00